From 0a16628212b8b0daac643b6a0f1caa4e9482afc8 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Wed, 12 Jul 2017 10:32:48 +0100
Subject: [PATCH 01/76] brcmfmac: fix regression in brcmf_sdio_txpkt_hdalign()

Recent change in brcmf_sdio_txpkt_hdalign() changed the
behavior and now always returns 0. This resulted in a
regression which basically renders the device useless.

Fixes: 270a6c1f65fe ("brcmfmac: rework headroom check in .start_xmit()")
Reported-by: S. Gilles <sgilles@math.umd.edu>
Tested-by: S. Gilles <sgilles@math.umd.edu>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index fbcbb4325936..c3ecec673eb7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -2053,12 +2053,13 @@ static int brcmf_sdio_txpkt_hdalign(struct brcmf_sdio *bus, struct sk_buff *pkt)
 				atomic_inc(&stats->pktcow_failed);
 				return -ENOMEM;
 			}
+			head_pad = 0;
 		}
 		skb_push(pkt, head_pad);
 		dat_buf = (u8 *)(pkt->data);
 	}
 	memset(dat_buf, 0, head_pad + bus->tx_hdrlen);
-	return 0;
+	return head_pad;
 }
 
 /**

From 271612d72da5b46715447bc18add4a1cf7d87687 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon, 17 Jul 2017 09:34:19 -0500
Subject: [PATCH 02/76] Revert "rtlwifi: btcoex: rtl8723be: fix ant_sel not
 work"

This reverts commit f95d95a7cd5514549dcf6ba754f0ee834cce3e1f.

With commit f95d95a7cd55 ("rtlwifi: btcoex: rtl8723be: fix ant_sel not
work"), the kernel has a NULL pointer dereference oops. This content and
the proper fix will be included in a later patch.

Fixes: f95d95a7cd55 ("rtlwifi: btcoex: rtl8723be: fix ant_sel not work")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Cc: Yan-Hsuan Chuang <yhchuang@realtek.com>
Cc: Birming Chiu <birming@realtek.com>
Cc: Shaofu <shaofu@realtek.com>
Cc: Steven Ting <steventing@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 3 ---
 drivers/net/wireless/realtek/rtlwifi/wifi.h         | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index 2a7ad5ffe997..cd5dc6dcb19f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -846,9 +846,6 @@ static bool _rtl8723be_init_mac(struct ieee80211_hw *hw)
 		return false;
 	}
 
-	if (rtlpriv->cfg->ops->get_btc_status())
-		rtlpriv->btcoexist.btc_ops->btc_power_on_setting(rtlpriv);
-
 	bytetmp = rtl_read_byte(rtlpriv, REG_MULTI_FUNC_CTRL);
 	rtl_write_byte(rtlpriv, REG_MULTI_FUNC_CTRL, bytetmp | BIT(3));
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index fb1ebb01133f..70723e67b7d7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -2547,7 +2547,6 @@ struct bt_coexist_info {
 struct rtl_btc_ops {
 	void (*btc_init_variables) (struct rtl_priv *rtlpriv);
 	void (*btc_init_hal_vars) (struct rtl_priv *rtlpriv);
-	void (*btc_power_on_setting)(struct rtl_priv *rtlpriv);
 	void (*btc_init_hw_config) (struct rtl_priv *rtlpriv);
 	void (*btc_ips_notify) (struct rtl_priv *rtlpriv, u8 type);
 	void (*btc_lps_notify)(struct rtl_priv *rtlpriv, u8 type);

From 0b0f934e92a8eaed2e6c48a50eae6f84661f74f3 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 8 Jun 2017 10:55:26 +0300
Subject: [PATCH 03/76] iwlwifi: dvm: prevent an out of bounds access

iwlagn_check_ratid_empty takes the tid as a parameter, but
it doesn't check that it is not IWL_TID_NON_QOS.
Since IWL_TID_NON_QOS = 8 and iwl_priv::tid_data is an array
with 8 entries, accessing iwl_priv::tid_data[IWL_TID_NON_QOS]
is a bad idea.
This happened in iwlagn_rx_reply_tx. Since
iwlagn_check_ratid_empty is relevant only to check whether
we can open A-MPDU, this flow is irrelevant if tid is
IWL_TID_NON_QOS. Call iwlagn_check_ratid_empty only inside
the
	if (tid != IWL_TID_NON_QOS)

a few lines earlier in the function.

Cc: <stable@vger.kernel.org>
Reported-by: Seraphime Kirkovski <kirkseraph@gmail.com>
Tested-by: Seraphime Kirkovski <kirkseraph@gmail.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/dvm/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
index adaa2f0097cc..fb40ddfced99 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c
@@ -1189,11 +1189,11 @@ void iwlagn_rx_reply_tx(struct iwl_priv *priv, struct iwl_rx_cmd_buffer *rxb)
 				next_reclaimed;
 			IWL_DEBUG_TX_REPLY(priv, "Next reclaimed packet:%d\n",
 						  next_reclaimed);
+			iwlagn_check_ratid_empty(priv, sta_id, tid);
 		}
 
 		iwl_trans_reclaim(priv->trans, txq_id, ssn, &skbs);
 
-		iwlagn_check_ratid_empty(priv, sta_id, tid);
 		freed = 0;
 
 		/* process frames */

From f6eac740a9b6f3737a969bad82931633519a1cc5 Mon Sep 17 00:00:00 2001
From: Mordechai Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Jun 2017 18:00:36 +0300
Subject: [PATCH 04/76] iwlwifi: pcie: fix unused txq NULL pointer dereference

Before TVQM, all TX queues were allocated straight at init.
With TVQM, queues are allocated on demand and hence we need
to check if a queue exists before dereferencing it.

Fixes: 66128fa08806 ("iwlwifi: move to TVQM mode")
Signed-off-by: Mordechai Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index de50418adae5..034bdb4a0b06 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -298,6 +298,9 @@ void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
 	for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
 		struct iwl_txq *txq = trans_pcie->txq[i];
 
+		if (!test_bit(i, trans_pcie->queue_used))
+			continue;
+
 		spin_lock_bh(&txq->lock);
 		if (txq->need_update) {
 			iwl_pcie_txq_inc_wr_ptr(trans, txq);

From 61dd8a8a6a0c3cbfb6b02ab652c4f4efb93f3d79 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 12 Jun 2017 15:10:09 +0300
Subject: [PATCH 05/76] iwlwifi: mvm: fix a NULL pointer dereference of error
 in recovery

Sometimes, we can have an firmware crash while trying to
recover from a previous firmware problem.
When that happens, lots of things can go wrong. For example
the stations don't get added properly to mvm->fw_id_to_mac_id.

Mac80211 tries to stop A-MPDU upon reconfig but in case of
a firmware crash we will bail out fairly early and in the
end, we won't delete the A-MPDU Rx timeout.
When that timer expired after a double firmware crash,
we end up dereferencing mvm->fw_id_to_mac_id[sta_id]
which is NULL.

Fixes: 10b2b2019d81 ("iwlwifi: mvm: add infrastructure for tracking BA session in driver")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 4df5f13fcdae..4a6df45b73df 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -277,6 +277,18 @@ static void iwl_mvm_rx_agg_session_expired(unsigned long data)
 
 	/* Timer expired */
 	sta = rcu_dereference(ba_data->mvm->fw_id_to_mac_id[ba_data->sta_id]);
+
+	/*
+	 * sta should be valid unless the following happens:
+	 * The firmware asserts which triggers a reconfig flow, but
+	 * the reconfig fails before we set the pointer to sta into
+	 * the fw_id_to_mac_id pointer table. Mac80211 can't stop
+	 * A-MDPU and hence the timer continues to run. Then, the
+	 * timer expires and sta is NULL.
+	 */
+	if (!sta)
+		goto unlock;
+
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	ieee80211_stop_rx_ba_session_offl(mvm_sta->vif,
 					  sta->addr, ba_data->tid);

From 2388bd7b133504fa0991f483db66fad3a0de8694 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 14 Jun 2017 13:44:51 +0300
Subject: [PATCH 06/76] iwlwifi: missing error code in iwl_trans_pcie_alloc()

We don't set the error code here so we end up returning ERR_PTR(0) which
is NULL.  The caller doesn't expect that so it results in a NULL
dereference.

Fixes: 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 92b3a55d0fbc..f95eec52508e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3150,7 +3150,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 	init_waitqueue_head(&trans_pcie->d0i3_waitq);
 
 	if (trans_pcie->msix_enabled) {
-		if (iwl_pcie_init_msix_handler(pdev, trans_pcie))
+		ret = iwl_pcie_init_msix_handler(pdev, trans_pcie);
+		if (ret)
 			goto out_no_pci;
 	 } else {
 		ret = iwl_pcie_alloc_ict(trans);

From 5462bcd8c9dc7e7ff2dd54c3f1bb5a9a729f7a73 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 14 Jun 2017 12:21:05 +0300
Subject: [PATCH 07/76] iwlwifi: fix tracing when tx only is enabled

iwl_trace_data is somewhat confusing. It returns a bool
that tells if the payload of the skb should be added to
the tx_data event. If it returns false, then the payload
of the skb is added to the tx event.

The purpose is to be able to start tracing with
-e iwlwifi
and record non-data packets only which saves bandwidth.

Since EAPOLs are important, seldom and not real data
packet (despite being WiFi data packets), they are
included in tx event and thus iwl_trace_data returns false
on those. This last part was buggy, and because of that,
all the data packets were included in the tx event.

Fix that.

Fixes: 0c4cb7314d15 ("iwlwifi: tracing: decouple from mac80211")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
index 545d14b0bc92..f5c1127253cb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h
@@ -55,8 +55,8 @@ static inline bool iwl_trace_data(struct sk_buff *skb)
 	/* also account for the RFC 1042 header, of course */
 	offs += 6;
 
-	return skb->len > offs + 2 &&
-	       *(__be16 *)(skb->data + offs) == cpu_to_be16(ETH_P_PAE);
+	return skb->len <= offs + 2 ||
+		*(__be16 *)(skb->data + offs) != cpu_to_be16(ETH_P_PAE);
 }
 
 static inline size_t iwl_rx_trace_len(const struct iwl_trans *trans,

From 7b758a111819006ba64dd23aa016d42a20ba8557 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Tue, 20 Jun 2017 13:40:03 +0300
Subject: [PATCH 08/76] iwlwifi: mvm: handle IBSS probe_queue in a few missing
 places

When IBSS was implemented for DQA, we missid a few places where it
should be handled in the same way as AP.

Fixes: ee48b72211f8 ("iwlwifi: mvm: support ibss in dqa mode")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 4a6df45b73df..ab66b4394dfc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2027,7 +2027,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 						IWL_MAX_TID_COUNT,
 						wdg_timeout);
 
-		if (vif->type == NL80211_IFTYPE_AP)
+		if (vif->type == NL80211_IFTYPE_AP ||
+		    vif->type == NL80211_IFTYPE_ADHOC)
 			mvm->probe_queue = queue;
 		else if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
 			mvm->p2p_dev_queue = queue;

From bf8b286f86fcc66d138fd992acfa37839340218d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 30 Jun 2017 10:48:28 +0200
Subject: [PATCH 09/76] iwlwifi: mvm: defer setting
 IWL_MVM_STATUS_IN_HW_RESTART
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A hardware/firmware error may happen at any point in time. In
particular, it might happen while mac80211 is in the middle of
a flow. We observed the following situation:
 * mac80211 is in authentication flow, in ieee80211_prep_connection()
 * iwlwifi firmware crashes, but no error can be reported at this
   precise point (mostly because the driver method is void, but even
   if it wasn't we'd just shift to a race condition)
 * mac80211 continues the flow, trying to add the AP station
 * iwlwifi has already set its internal restart flag, and so thinks
   that adding the station is part of the restart and already set up,
   so it uses the information that's supposed to already be in the
   struct

This can happen with any flow in mac80211 and with any information
we try to preserve across hardware restarts.

To fix this, only set a new HW_RESTART_REQUESTED flag and translate
that to IN_HW_RESTART once mac80211 actually starts the restart by
calling our start() method. As a consequence, any mac80211 flow in
progress at the time of the restart will properly finish (certainly
with errors), before the restart is attempted.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=195299.

Reported-by: djagoo <dev@djagoo.io>
Reported-by: Łukasz Siudut <lsiudut@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 +++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h      | 2 ++
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c      | 6 +++---
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index bcde1ba0f1c8..c7b1e58e3384 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1084,7 +1084,13 @@ int __iwl_mvm_mac_start(struct iwl_mvm *mvm)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+	if (test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status)) {
+		/*
+		 * Now convert the HW_RESTART_REQUESTED flag to IN_HW_RESTART
+		 * so later code will - from now on - see that we're doing it.
+		 */
+		set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
+		clear_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
 		/* Clean up some internal and mac80211 state on restart */
 		iwl_mvm_restart_cleanup(mvm);
 	} else {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index eaacfaf37206..ddd8719f27b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1090,6 +1090,7 @@ struct iwl_mvm {
  * @IWL_MVM_STATUS_HW_RFKILL: HW RF-kill is asserted
  * @IWL_MVM_STATUS_HW_CTKILL: CT-kill is active
  * @IWL_MVM_STATUS_ROC_RUNNING: remain-on-channel is running
+ * @IWL_MVM_STATUS_HW_RESTART_REQUESTED: HW restart was requested
  * @IWL_MVM_STATUS_IN_HW_RESTART: HW restart is active
  * @IWL_MVM_STATUS_IN_D0I3: NIC is in D0i3
  * @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running
@@ -1101,6 +1102,7 @@ enum iwl_mvm_status {
 	IWL_MVM_STATUS_HW_RFKILL,
 	IWL_MVM_STATUS_HW_CTKILL,
 	IWL_MVM_STATUS_ROC_RUNNING,
+	IWL_MVM_STATUS_HW_RESTART_REQUESTED,
 	IWL_MVM_STATUS_IN_HW_RESTART,
 	IWL_MVM_STATUS_IN_D0I3,
 	IWL_MVM_STATUS_ROC_AUX_RUNNING,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 4d1188b8736a..9c175d5e9d67 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1235,9 +1235,8 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
 	 */
 	if (!mvm->fw_restart && fw_error) {
 		iwl_mvm_fw_dbg_collect_desc(mvm, &iwl_mvm_dump_desc_assert,
-					    NULL);
-	} else if (test_and_set_bit(IWL_MVM_STATUS_IN_HW_RESTART,
-				    &mvm->status)) {
+					NULL);
+	} else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
 		struct iwl_mvm_reprobe *reprobe;
 
 		IWL_ERR(mvm,
@@ -1268,6 +1267,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
 
 		if (fw_error && mvm->fw_restart > 0)
 			mvm->fw_restart--;
+		set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
 		ieee80211_restart_hw(mvm->hw);
 	}
 }

From 6c7fce6fa86a110d6455662d823c4e09f8f7be4a Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 22 Jul 2017 20:45:55 +0800
Subject: [PATCH 10/76] net: ethernet: mediatek: avoid potential invalid memory
 access

Potential dangerous invalid memory might be accessed if invalid mac value
reflected from the forward port field in rxd4 caused by possible potential
hardware defects. So added a simple sanity checker to avoid the kind of
situation happening.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Acked-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3d0c2e6347a..3e3232fd7c05 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -947,6 +947,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		      RX_DMA_FPORT_MASK;
 		mac--;
 
+		if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
+			     !eth->netdev[mac]))
+			goto release_desc;
+
 		netdev = eth->netdev[mac];
 
 		if (unlikely(test_bit(MTK_RESETTING, &eth->state)))

From 5edfbd3c0697e643f179dba819a7c09375b42543 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Thu, 20 Jul 2017 02:41:34 -0700
Subject: [PATCH 11/76] tun/tap: Add the missed return value check of
 register_netdevice_notifier

There is some codes of tun/tap module which did not check the return
value of register_netdevice_notifier. Add the check now.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3d4c24572ecd..32ad87345f57 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2598,8 +2598,16 @@ static int __init tun_init(void)
 		goto err_misc;
 	}
 
-	register_netdevice_notifier(&tun_notifier_block);
+	ret = register_netdevice_notifier(&tun_notifier_block);
+	if (ret) {
+		pr_err("Can't register netdevice notifier\n");
+		goto err_notifier;
+	}
+
 	return  0;
+
+err_notifier:
+	misc_deregister(&tun_miscdev);
 err_misc:
 	rtnl_link_unregister(&tun_link_ops);
 err_linkops:

From 70dba204a396c1c094749c4ef9bd27d5e8176a08 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 20 Jul 2017 11:06:31 +0100
Subject: [PATCH 12/76] net: ethernet: mediatek: Explicitly include
 linux/interrupt.h

The mediatek ethernet driver uses interrupts but does not explicitly
include linux/interrupt.h, relying on implicit includes.  Fix this so we
don't get build breaks as happened for ARM in next-20170720.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3e3232fd7c05..e588a0cdb074 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -22,6 +22,7 @@
 #include <linux/if_vlan.h>
 #include <linux/reset.h>
 #include <linux/tcp.h>
+#include <linux/interrupt.h>
 
 #include "mtk_eth_soc.h"
 

From 545722cb0fc993226a01844fb27cf832459eb1c0 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 21 Jul 2017 14:36:57 +0100
Subject: [PATCH 13/76] selftests/bpf: subtraction bounds test

There is a bug in the verifier's handling of BPF_SUB: [a,b] - [c,d] yields
 was [a-c, b-d] rather than the correct [a-d, b-c].  So here is a test
 which, with the bogus handling, will produce ranges of [0,0] and thus
 allowed accesses; whereas the correct handling will give a range of
 [-255, 255] (and hence the right-shift will give a range of [0, 255]) and
 the accesses will be rejected.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 28 +++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index af7d173910f4..addea82f76c9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5980,6 +5980,34 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.result_unpriv = REJECT,
 	},
+	{
+		"subtraction bounds (map value)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
+			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

From 9305706c2e808ae59f1eb201867f82f1ddf6d7a6 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 21 Jul 2017 14:37:34 +0100
Subject: [PATCH 14/76] bpf/verifier: fix min/max handling in BPF_SUB

We have to subtract the src max from the dst min, and vice-versa, since
 (e.g.) the smallest result comes from the largest subtrahend.

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index af9e84a4944e..664d93972373 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1865,10 +1865,12 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	 * do our normal operations to the register, we need to set the values
 	 * to the min/max since they are undefined.
 	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE)
-		dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-	if (max_val == BPF_REGISTER_MAX_RANGE)
-		dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+	if (opcode != BPF_SUB) {
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (max_val == BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+	}
 
 	switch (opcode) {
 	case BPF_ADD:
@@ -1879,10 +1881,17 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_SUB:
+		/* If one of our values was at the end of our ranges, then the
+		 * _opposite_ value in the dst_reg goes to the end of our range.
+		 */
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		if (max_val == BPF_REGISTER_MAX_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value -= min_val;
+			dst_reg->min_value -= max_val;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value -= max_val;
+			dst_reg->max_value -= min_val;
 		dst_reg->min_align = min(src_align, dst_align);
 		break;
 	case BPF_MUL:

From e859afe1ee0c5ae981c55387ccd45eba258a7842 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 21 Jul 2017 16:51:31 +0200
Subject: [PATCH 15/76] lib: test_rhashtable: fix for large entry counts

During concurrent access testing, threadfunc() concatenated thread ID
and object index to create a unique key like so:

| tdata->objs[i].value = (tdata->id << 16) | i;

This breaks if a user passes an entries parameter of 64k or higher,
since 'i' might use more than 16 bits then. Effectively, this will lead
to duplicate keys in the table.

Fix the problem by introducing a struct holding object and thread ID and
using that as key instead of a single integer type field.

Fixes: f4a3e90ba5739 ("rhashtable-test: extend to test concurrency")
Reported by: Manuel Messner <mm@skelett.io>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_rhashtable.c | 53 ++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 64e899b63337..16949d219291 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -56,8 +56,13 @@ static bool enomem_retry = false;
 module_param(enomem_retry, bool, 0);
 MODULE_PARM_DESC(enomem_retry, "Retry insert even if -ENOMEM was returned (default: off)");
 
+struct test_obj_val {
+	int	id;
+	int	tid;
+};
+
 struct test_obj {
-	int			value;
+	struct test_obj_val	value;
 	struct rhash_head	node;
 };
 
@@ -72,7 +77,7 @@ static struct test_obj array[MAX_ENTRIES];
 static struct rhashtable_params test_rht_params = {
 	.head_offset = offsetof(struct test_obj, node),
 	.key_offset = offsetof(struct test_obj, value),
-	.key_len = sizeof(int),
+	.key_len = sizeof(struct test_obj_val),
 	.hashfn = jhash,
 	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
@@ -109,24 +114,26 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 	for (i = 0; i < entries * 2; i++) {
 		struct test_obj *obj;
 		bool expected = !(i % 2);
-		u32 key = i;
+		struct test_obj_val key = {
+			.id = i,
+		};
 
-		if (array[i / 2].value == TEST_INSERT_FAIL)
+		if (array[i / 2].value.id == TEST_INSERT_FAIL)
 			expected = false;
 
 		obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 
 		if (expected && !obj) {
-			pr_warn("Test failed: Could not find key %u\n", key);
+			pr_warn("Test failed: Could not find key %u\n", key.id);
 			return -ENOENT;
 		} else if (!expected && obj) {
 			pr_warn("Test failed: Unexpected entry found for key %u\n",
-				key);
+				key.id);
 			return -EEXIST;
 		} else if (expected && obj) {
-			if (obj->value != i) {
+			if (obj->value.id != i) {
 				pr_warn("Test failed: Lookup value mismatch %u!=%u\n",
-					obj->value, i);
+					obj->value.id, i);
 				return -EINVAL;
 			}
 		}
@@ -195,7 +202,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 	for (i = 0; i < entries; i++) {
 		struct test_obj *obj = &array[i];
 
-		obj->value = i * 2;
+		obj->value.id = i * 2;
 		err = insert_retry(ht, &obj->node, test_rht_params);
 		if (err > 0)
 			insert_retries += err;
@@ -218,7 +225,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 	for (i = 0; i < entries; i++) {
 		u32 key = i * 2;
 
-		if (array[i].value != TEST_INSERT_FAIL) {
+		if (array[i].value.id != TEST_INSERT_FAIL) {
 			obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 			BUG_ON(!obj);
 
@@ -242,18 +249,21 @@ static int thread_lookup_test(struct thread_data *tdata)
 
 	for (i = 0; i < entries; i++) {
 		struct test_obj *obj;
-		int key = (tdata->id << 16) | i;
+		struct test_obj_val key = {
+			.id = i,
+			.tid = tdata->id,
+		};
 
 		obj = rhashtable_lookup_fast(&ht, &key, test_rht_params);
-		if (obj && (tdata->objs[i].value == TEST_INSERT_FAIL)) {
-			pr_err("  found unexpected object %d\n", key);
+		if (obj && (tdata->objs[i].value.id == TEST_INSERT_FAIL)) {
+			pr_err("  found unexpected object %d-%d\n", key.tid, key.id);
 			err++;
-		} else if (!obj && (tdata->objs[i].value != TEST_INSERT_FAIL)) {
-			pr_err("  object %d not found!\n", key);
+		} else if (!obj && (tdata->objs[i].value.id != TEST_INSERT_FAIL)) {
+			pr_err("  object %d-%d not found!\n", key.tid, key.id);
 			err++;
-		} else if (obj && (obj->value != key)) {
-			pr_err("  wrong object returned (got %d, expected %d)\n",
-			       obj->value, key);
+		} else if (obj && memcmp(&obj->value, &key, sizeof(key))) {
+			pr_err("  wrong object returned (got %d-%d, expected %d-%d)\n",
+			       obj->value.tid, obj->value.id, key.tid, key.id);
 			err++;
 		}
 
@@ -272,7 +282,8 @@ static int threadfunc(void *data)
 		pr_err("  thread[%d]: down_interruptible failed\n", tdata->id);
 
 	for (i = 0; i < entries; i++) {
-		tdata->objs[i].value = (tdata->id << 16) | i;
+		tdata->objs[i].value.id = i;
+		tdata->objs[i].value.tid = tdata->id;
 		err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params);
 		if (err > 0) {
 			insert_retries += err;
@@ -295,7 +306,7 @@ static int threadfunc(void *data)
 
 	for (step = 10; step > 0; step--) {
 		for (i = 0; i < entries; i += step) {
-			if (tdata->objs[i].value == TEST_INSERT_FAIL)
+			if (tdata->objs[i].value.id == TEST_INSERT_FAIL)
 				continue;
 			err = rhashtable_remove_fast(&ht, &tdata->objs[i].node,
 			                             test_rht_params);
@@ -304,7 +315,7 @@ static int threadfunc(void *data)
 				       tdata->id);
 				goto out;
 			}
-			tdata->objs[i].value = TEST_INSERT_FAIL;
+			tdata->objs[i].value.id = TEST_INSERT_FAIL;
 
 			cond_resched();
 		}

From 1819ae3dfebe7bb2ade39dbf04518448c922dc51 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 21 Jul 2017 18:04:28 +0200
Subject: [PATCH 16/76] mlxsw: spectrum_router: Don't offload routes next in
 list

Each FIB node holds a linked list of routes sharing the same prefix and
length. In the case of IPv4 it's ordered according to table ID, metric
and TOS and only the first route in the list is actually programmed to
the device.

In case a gatewayed route is added somewhere in the list, then after its
nexthop group will be refreshed and become valid (due to the resolution
of its gateway), it'll mistakenly overwrite the existing entry.

Example:
192.168.200.0/24 dev enp3s0np3 scope link metric 1000 offload
192.168.200.0/24 via 192.168.100.1 dev enp3s0np3 metric 1000 offload

Both routes are marked as offloaded despite the fact only the first one
should actually be present in the device's table.

When refreshing the nexthop group, don't write the route to the device's
table unless it's the first in its node.

Fixes: 9aecce1c7d97 ("mlxsw: spectrum_router: Correctly handle identical routes")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 383fef5a8e24..4b2e0fd7d51e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1512,6 +1512,10 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_entry *fib_entry);
 
+static bool
+mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
+				 const struct mlxsw_sp_fib_entry *fib_entry);
+
 static int
 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop_group *nh_grp)
@@ -1520,6 +1524,9 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
+						      fib_entry))
+			continue;
 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 		if (err)
 			return err;

From 864d9664245565a6b9df86a68c7664a25a4fcd58 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 21 Jul 2017 18:49:45 +0200
Subject: [PATCH 17/76] net/socket: fix type in assignment and trim long line

The commit ffb07550c76f ("copy_msghdr_from_user(): get rid of
field-by-field copyin") introduce a new sparse warning:

net/socket.c:1919:27: warning: incorrect type in assignment (different address spaces)
net/socket.c:1919:27:    expected void *msg_control
net/socket.c:1919:27:    got void [noderef] <asn:1>*[addressable] msg_control

and a line above 80 chars, let's fix them

Fixes: ffb07550c76f ("copy_msghdr_from_user(): get rid of field-by-field copyin")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index bf2122691fba..ad22df1ffbd1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1916,7 +1916,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 	if (copy_from_user(&msg, umsg, sizeof(*umsg)))
 		return -EFAULT;
 
-	kmsg->msg_control = msg.msg_control;
+	kmsg->msg_control = (void __force *)msg.msg_control;
 	kmsg->msg_controllen = msg.msg_controllen;
 	kmsg->msg_flags = msg.msg_flags;
 
@@ -1935,7 +1935,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 
 	if (msg.msg_name && kmsg->msg_namelen) {
 		if (!save_addr) {
-			err = move_addr_to_kernel(msg.msg_name, kmsg->msg_namelen,
+			err = move_addr_to_kernel(msg.msg_name,
+						  kmsg->msg_namelen,
 						  kmsg->msg_name);
 			if (err < 0)
 				return err;

From f4458b92d2190757bef81a5f282d2644ccbaec23 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Fri, 21 Jul 2017 16:35:09 -0500
Subject: [PATCH 18/76] net: stmmac: Adjust dump offset of DMA registers for
 ethtool

The commit fbf68229ffe7 ("net: stmmac: unify registers dumps methods")

in the Linux kernel modified the register dump to store the DMA registers
at the DMA register offset (0x1000) but ethtool (stmmac.c) looks for the
DMA registers after the MAC registers which is offset 55.
This patch copies the DMA registers from the higher offset to the offset
where ethtool expects them.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c  | 2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c   | 2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h      | 3 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 5 +++++
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 22cf6353ba04..7ecf549c7f1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -205,7 +205,7 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 {
 	int i;
 
-	for (i = 0; i < 23; i++)
+	for (i = 0; i < NUM_DWMAC1000_DMA_REGS; i++)
 		if ((i < 12) || (i > 17))
 			reg_space[DMA_BUS_MODE / 4 + i] =
 				readl(ioaddr + DMA_BUS_MODE + i * 4);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index eef2f222ce9a..6502b9aa3bf5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -70,7 +70,7 @@ static void dwmac100_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 {
 	int i;
 
-	for (i = 0; i < 9; i++)
+	for (i = 0; i < NUM_DWMAC100_DMA_REGS; i++)
 		reg_space[DMA_BUS_MODE / 4 + i] =
 			readl(ioaddr + DMA_BUS_MODE + i * 4);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 9091df86723a..adc54006f884 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -136,6 +136,9 @@
 #define DMA_STATUS_TI	0x00000001	/* Transmit Interrupt */
 #define DMA_CONTROL_FTF		0x00100000	/* Flush transmit FIFO */
 
+#define NUM_DWMAC100_DMA_REGS	9
+#define NUM_DWMAC1000_DMA_REGS	23
+
 void dwmac_enable_dma_transmission(void __iomem *ioaddr);
 void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan);
 void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index babb39c646ff..af30b4857c3b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -33,6 +33,8 @@
 #define MAC100_ETHTOOL_NAME	"st_mac100"
 #define GMAC_ETHTOOL_NAME	"st_gmac"
 
+#define ETHTOOL_DMA_OFFSET	55
+
 struct stmmac_stats {
 	char stat_string[ETH_GSTRING_LEN];
 	int sizeof_stat;
@@ -442,6 +444,9 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
 
 	priv->hw->mac->dump_regs(priv->hw, reg_space);
 	priv->hw->dma->dump_regs(priv->ioaddr, reg_space);
+	/* Copy DMA registers to where ethtool expects them */
+	memcpy(&reg_space[ETHTOOL_DMA_OFFSET], &reg_space[DMA_BUS_MODE / 4],
+	       NUM_DWMAC1000_DMA_REGS * 4);
 }
 
 static void

From 9476d393667968b4a02afbe9d35a3558482b943e Mon Sep 17 00:00:00 2001
From: Thomas Jarosch <thomas.jarosch@intra2net.com>
Date: Sat, 22 Jul 2017 17:14:34 +0200
Subject: [PATCH 19/76] mcs7780: Fix initialization when CONFIG_VMAP_STACK is
 enabled

DMA transfers are not allowed to buffers that are on the stack.
Therefore allocate a buffer to store the result of usb_control_message().

Fixes these bugreports:
https://bugzilla.kernel.org/show_bug.cgi?id=195217

https://bugzilla.redhat.com/show_bug.cgi?id=1421387
https://bugzilla.redhat.com/show_bug.cgi?id=1427398

Shortened kernel backtrace from 4.11.9-200.fc25.x86_64:
kernel: ------------[ cut here ]------------
kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587
kernel: transfer buffer not dma capable
kernel: Call Trace:
kernel: dump_stack+0x63/0x86
kernel: __warn+0xcb/0xf0
kernel: warn_slowpath_fmt+0x5a/0x80
kernel: usb_hcd_map_urb_for_dma+0x37f/0x570
kernel: ? try_to_del_timer_sync+0x53/0x80
kernel: usb_hcd_submit_urb+0x34e/0xb90
kernel: ? schedule_timeout+0x17e/0x300
kernel: ? del_timer_sync+0x50/0x50
kernel: ? __slab_free+0xa9/0x300
kernel: usb_submit_urb+0x2f4/0x560
kernel: ? urb_destroy+0x24/0x30
kernel: usb_start_wait_urb+0x6e/0x170
kernel: usb_control_msg+0xdc/0x120
kernel: mcs_get_reg+0x36/0x40 [mcs7780]
kernel: mcs_net_open+0xb5/0x5c0 [mcs7780]
...

Regression goes back to 4.9, so it's a good candidate for -stable.
Though it's the decision of the maintainer.

Thanks to Dan Williams for adding the "transfer buffer not dma capable"
warning in the first place. It instantly pointed me in the right direction.

Patch has been tested with transferring data from a Polar watch.

Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/mcs7780.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 6f6ed75b63c9..765de3bedb88 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mcs, __u16 reg, __u16 val)
 static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val)
 {
 	struct usb_device *dev = mcs->usbdev;
-	int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
-				  MCS_RD_RTYPE, 0, reg, val, 2,
-				  msecs_to_jiffies(MCS_CTRL_TIMEOUT));
+	void *dmabuf;
+	int ret;
+
+	dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL);
+	if (!dmabuf)
+		return -ENOMEM;
+
+	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+			      MCS_RD_RTYPE, 0, reg, dmabuf, 2,
+			      msecs_to_jiffies(MCS_CTRL_TIMEOUT));
+
+	memcpy(val, dmabuf, sizeof(__u16));
+	kfree(dmabuf);
 
 	return ret;
 }

From 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 23 Jul 2017 17:52:23 +0800
Subject: [PATCH 20/76] openvswitch: fix potential out of bound access in
 parse_ct

Before the 'type' is validated, we shouldn't use it to fetch the
ovs_ct_attr_lens's minlen and maxlen, else, out of bound access
may happen.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e3c4c6c3fef7..03859e386b47 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1310,8 +1310,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
-		int maxlen = ovs_ct_attr_lens[type].maxlen;
-		int minlen = ovs_ct_attr_lens[type].minlen;
+		int maxlen;
+		int minlen;
 
 		if (type > OVS_CT_ATTR_MAX) {
 			OVS_NLERR(log,
@@ -1319,6 +1319,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 				  type, OVS_CT_ATTR_MAX);
 			return -EINVAL;
 		}
+
+		maxlen = ovs_ct_attr_lens[type].maxlen;
+		minlen = ovs_ct_attr_lens[type].minlen;
 		if (nla_len(a) < minlen || nla_len(a) > maxlen) {
 			OVS_NLERR(log,
 				  "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",

From c7472ec4a00c298d2b55c0aa945b8a4c10f6f898 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 24 Jul 2017 16:59:01 +1000
Subject: [PATCH 21/76] ftgmac100: Increase reset timeout

We had reports of 50us not being sufficient to reset the MAC,
thus hitting the "Hardware reset failed" error bringing the
interface up on some AST2400 based machines.

This bumps the timeout to 200us.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 95bf5e89cfd1..bfda0b2e2b82 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -125,7 +125,7 @@ static int ftgmac100_reset_mac(struct ftgmac100 *priv, u32 maccr)
 	iowrite32(maccr, priv->base + FTGMAC100_OFFSET_MACCR);
 	iowrite32(maccr | FTGMAC100_MACCR_SW_RST,
 		  priv->base + FTGMAC100_OFFSET_MACCR);
-	for (i = 0; i < 50; i++) {
+	for (i = 0; i < 200; i++) {
 		unsigned int maccr;
 
 		maccr = ioread32(priv->base + FTGMAC100_OFFSET_MACCR);

From d57b9db1ae0cde366c9cc6b038cb1e8ff05557a1 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 24 Jul 2017 16:59:07 +1000
Subject: [PATCH 22/76] ftgmac100: Make the MDIO bus a child of the ethernet
 device

Populate mii_bus->parent with our own platform device before
registering, which makes it easier to locate the MDIO bus
in sysfs when trying to diagnose problems.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index bfda0b2e2b82..cdb979440ca5 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1682,6 +1682,7 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
 	priv->mii_bus->name = "ftgmac100_mdio";
 	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
 		 pdev->name, pdev->id);
+	priv->mii_bus->parent = priv->dev;
 	priv->mii_bus->priv = priv->netdev;
 	priv->mii_bus->read = ftgmac100_mdiobus_read;
 	priv->mii_bus->write = ftgmac100_mdiobus_write;

From c800aaf8d869f2b9b47b10c5c312fe19f0a94042 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 24 Jul 2017 10:07:32 -0700
Subject: [PATCH 23/76] packet: fix use-after-free in
 prb_retire_rx_blk_timer_expired()

There are multiple reports showing we have a use-after-free in
the timer prb_retire_rx_blk_timer_expired(), where we use struct
tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by
free_pg_vec().

The interesting part is it is not freed via packet_release() but
via packet_setsockopt(), which means we are not closing the socket.
Looking into the big and fat function packet_set_ring(), this could
happen if we satisfy the following conditions:

1. closing == 0, not on packet_release() path
2. req->tp_block_nr == 0, we don't allocate a new pg_vec
3. rx_ring->pg_vec is already set as V3, which means we already called
   packet_set_ring() wtih req->tp_block_nr > 0 previously
4. req->tp_frame_nr == 0, pass sanity check
5. po->mapped == 0, never called mmap()

In this scenario we are clearing the old rx_ring->pg_vec, so we need
to free this pg_vec, but we don't stop the timer on this path because
of closing==0.

The timer has to be stopped as long as we need to free pg_vec, therefore
the check on closing!=0 is wrong, we should check pg_vec!=NULL instead.

Thanks to liujian for testing different fixes.

Reported-by: alexander.levin@verizon.com
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Reported-by: liujian (CE) <liujian56@huawei.com>
Tested-by: liujian (CE) <liujian56@huawei.com>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 008bb34ee324..0615c2a950fa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4329,7 +4329,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		register_prot_hook(sk);
 	}
 	spin_unlock(&po->bind_lock);
-	if (closing && (po->tp_version > TPACKET_V2)) {
+	if (pg_vec && (po->tp_version > TPACKET_V2)) {
 		/* Because we don't support block-based V3 on tx-ring */
 		if (!tx_ring)
 			prb_shutdown_retire_blk_timer(po, rb_queue);

From 9f9e772da2db279c8d8c9206d271b8009c9093f4 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 24 Jul 2017 10:49:23 -0700
Subject: [PATCH 24/76] net: dsa: Initialize ds->cpu_port_mask earlier

The mt7530 driver has its dsa_switch_ops::get_tag_protocol function
check ds->cpu_port_mask to issue a warning in case the configured CPU
port is not capable of supporting tags.

After commit 14be36c2c96c ("net: dsa: Initialize all CPU and enabled
ports masks in dsa_ds_parse()") we slightly re-arranged the
initialization such that this was no longer working. Just make sure that
ds->cpu_port_mask is set prior to the first call to get_tag_protocol,
thus restoring the expected contract. In case of error, the CPU port bit
is cleared.

Fixes: 14be36c2c96c ("net: dsa: Initialize all CPU and enabled ports masks in dsa_ds_parse()")
Reported-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 56e46090526b..c442051d5a55 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -509,21 +509,22 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
 		dst->cpu_dp->netdev = ethernet_dev;
 	}
 
-	tag_protocol = ds->ops->get_tag_protocol(ds);
-	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
-	if (IS_ERR(dst->tag_ops)) {
-		dev_warn(ds->dev, "No tagger for this switch\n");
-		return PTR_ERR(dst->tag_ops);
-	}
-
-	dst->rcv = dst->tag_ops->rcv;
-
 	/* Initialize cpu_port_mask now for drv->setup()
 	 * to have access to a correct value, just like what
 	 * net/dsa/dsa.c::dsa_switch_setup_one does.
 	 */
 	ds->cpu_port_mask |= BIT(index);
 
+	tag_protocol = ds->ops->get_tag_protocol(ds);
+	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+	if (IS_ERR(dst->tag_ops)) {
+		dev_warn(ds->dev, "No tagger for this switch\n");
+		ds->cpu_port_mask &= ~BIT(index);
+		return PTR_ERR(dst->tag_ops);
+	}
+
+	dst->rcv = dst->tag_ops->rcv;
+
 	return 0;
 }
 

From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 Jul 2017 17:57:47 +0200
Subject: [PATCH 25/76] udp: preserve head state for IP_CMSG_PASSSEC

Paul Moore reported a SELinux/IP_PASSSEC regression
caused by missing skb->sp at recvmsg() time. We need to
preserve the skb head state to process the IP_CMSG_PASSSEC
cmsg.

With this commit we avoid releasing the skb head state in the
BH even if a secpath is attached to the current skb, and stores
the skb status (with/without head states) in the scratch area,
so that we can access it at skb deallocation time, without
incurring in cache-miss penalties.

This also avoids misusing the skb CB for ipv6 packets,
as introduced by the commit 0ddf3fb2c43d ("udp: preserve
skb->dst if required for IP options processing").

Clean a bit the scratch area helpers implementation, to
reduce the code differences between 32 and 64 bits build.

Reported-by: Paul Moore <paul@paul-moore.com>
Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue")
Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 33 ++++++++++++++++++++----------
 net/ipv4/udp.c    | 52 +++++++++++++++++++++++------------------------
 2 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 972ce4baab6b..56ce2d2a612d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
  */
-#if BITS_PER_LONG == 64
-
-/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
- * cold cache lines at recvmsg time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
 struct udp_dev_scratch {
-	u32 truesize;
+	/* skb->truesize and the stateless bit are embedded in a single field;
+	 * do not use a bitfield since the compiler emits better/smaller code
+	 * this way
+	 */
+	u32 _tsize_state;
+
+#if BITS_PER_LONG == 64
+	/* len and the bit needed to compute skb_csum_unnecessary
+	 * will be on cold cache lines at recvmsg time.
+	 * skb->len can be stored on 16 bits since the udp header has been
+	 * already validated and pulled.
+	 */
 	u16 len;
 	bool is_linear;
 	bool csum_unnecessary;
+#endif
 };
 
+static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb)
+{
+	return (struct udp_dev_scratch *)&skb->dev_scratch;
+}
+
+#if BITS_PER_LONG == 64
 static inline unsigned int udp_skb_len(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+	return udp_skb_scratch(skb)->len;
 }
 
 static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+	return udp_skb_scratch(skb)->csum_unnecessary;
 }
 
 static inline bool udp_skb_is_linear(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+	return udp_skb_scratch(skb)->is_linear;
 }
 
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..d243772f6efc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1163,34 +1163,32 @@ out:
 	return ret;
 }
 
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
-	struct udp_dev_scratch *scratch;
+	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
 
 	BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
-	scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
-	scratch->truesize = skb->truesize;
+	scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
 	scratch->len = skb->len;
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
-}
-
-static int udp_skb_truesize(struct sk_buff *skb)
-{
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
-	skb->dev_scratch = skb->truesize;
-}
-
-static int udp_skb_truesize(struct sk_buff *skb)
-{
-	return skb->dev_scratch;
-}
 #endif
+	if (likely(!skb->_skb_refdst))
+		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
+}
+
+static int udp_skb_truesize(struct sk_buff *skb)
+{
+	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
+}
+
+static bool udp_skb_has_head_state(struct sk_buff *skb)
+{
+	return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
+}
 
 /* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
-	/* we cleared the head states previously only if the skb lacks any IP
-	 * options, see __udp_queue_rcv_skb().
+	/* In the more common cases we cleared the head states previously,
+	 * see __udp_queue_rcv_skb().
 	 */
-	if (unlikely(IPCB(skb)->opt.optlen > 0))
+	if (unlikely(udp_skb_has_head_state(skb)))
 		skb_release_head_state(skb);
 	consume_stateless_skb(skb);
 }
@@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we need skb->dst to process IP options-related
-	 * cmsg, elsewhere can we clear all pending head states while they are
-	 * hot in the cache
+	/* At recvmsg() time we may access skb->dst or skb->sp depending on
+	 * the IP options and the cmsg flags, elsewhere can we clear all
+	 * pending head states while they are hot in the cache
 	 */
-	if (likely(IPCB(skb)->opt.optlen == 0))
+	if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp))
 		skb_release_head_state(skb);
 
 	rc = __udp_enqueue_schedule_skb(sk, skb);

From 2eaa38d9fcba5294182268b8d11770cf3fdc9bc9 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 25 Jul 2017 11:08:15 +0200
Subject: [PATCH 26/76] net: phy: Remove trailing semicolon in macro definition

Commit e5a03bfd873c2 ("phy: Add an mdio_device structure")
introduced a spurious trailing semicolon. Remove it.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2a9567bb8186..0bb5b212ab42 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -830,7 +830,7 @@ static inline int phy_read_status(struct phy_device *phydev)
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
 #define phydev_dbg(_phydev, format, args...)	\
-	dev_dbg(&_phydev->mdio.dev, format, ##args);
+	dev_dbg(&_phydev->mdio.dev, format, ##args)
 
 static inline const char *phydev_name(const struct phy_device *phydev)
 {

From 783692558a60cd69d8d86900b33846263598ca6c Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Tue, 25 Jul 2017 13:36:21 +0200
Subject: [PATCH 27/76] lib: test_rhashtable: Fix KASAN warning

I forgot one spot when introducing struct test_obj_val.

Fixes: e859afe1ee0c5 ("lib: test_rhashtable: fix for large entry counts")
Reported by: kernel test robot <fengguang.wu@intel.com>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_rhashtable.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 16949d219291..0ffca990a833 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -223,7 +223,9 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 
 	pr_info("  Deleting %d keys\n", entries);
 	for (i = 0; i < entries; i++) {
-		u32 key = i * 2;
+		struct test_obj_val key = {
+			.id = i * 2,
+		};
 
 		if (array[i].value.id != TEST_INSERT_FAIL) {
 			obj = rhashtable_lookup_fast(ht, &key, test_rht_params);

From afce615aaabfbaad02550e75c0bec106dafa1adf Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Mon, 24 Jul 2017 23:14:28 +0200
Subject: [PATCH 28/76] ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in
 ip6_fragment()

RFC 2465 defines ipv6IfStatsOutFragFails as:

	"The number of IPv6 datagrams that have been discarded
	 because they needed to be fragmented at this output
	 interface but could not be."

The existing implementation, instead, would increase the counter
twice in case we fail to allocate room for single fragments:
once for the fragment, once for the datagram.

This didn't look intentional though. In one of the two affected
affected failure paths, the double increase was simply a result
of a new 'goto fail' statement, introduced to avoid a skb leak.
The other path appears to be affected since at least 2.6.12-rc2.

Reported-by: Sabrina Dubroca <sdubroca@redhat.com>
Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1422d6c08377..162efba0d0cd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		*prevhdr = NEXTHDR_FRAGMENT;
 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
-			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				      IPSTATS_MIB_FRAGFAILS);
 			err = -ENOMEM;
 			goto fail;
 		}
@@ -789,8 +787,6 @@ slow_path:
 		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 				 hroom + troom, GFP_ATOMIC);
 		if (!frag) {
-			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-				      IPSTATS_MIB_FRAGFAILS);
 			err = -ENOMEM;
 			goto fail;
 		}

From 6cee9d649cd1824a4d9ce6940dd716f2bf2ef24f Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 25 Jul 2017 10:19:01 +0930
Subject: [PATCH 29/76] ftgmac100: return error in ftgmac100_alloc_rx_buf

The error paths set err, but it's not returned.

I wondered if we should fix all of the callers to check the returned
value, but Ben explains why the code is this way:

> Most call sites ignore it on purpose. There's nothing we can do if
> we fail to get a buffer at interrupt time, so we point the buffer to
> the scratch page so the HW doesn't DMA into lalaland and lose the
> packet.
>
> The one call site that tests and can fail is the one used when brining
> the interface up. If we fail to allocate at that point, we fail the
> ifup. But as you noticed, I do have a bug not returning the error.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index cdb979440ca5..34dae51effd4 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -392,7 +392,7 @@ static int ftgmac100_alloc_rx_buf(struct ftgmac100 *priv, unsigned int entry,
 	struct net_device *netdev = priv->netdev;
 	struct sk_buff *skb;
 	dma_addr_t map;
-	int err;
+	int err = 0;
 
 	skb = netdev_alloc_skb_ip_align(netdev, RX_BUF_SIZE);
 	if (unlikely(!skb)) {
@@ -428,7 +428,7 @@ static int ftgmac100_alloc_rx_buf(struct ftgmac100 *priv, unsigned int entry,
 	else
 		rxdes->rxdes0 = 0;
 
-	return 0;
+	return err;
 }
 
 static unsigned int ftgmac100_next_rx_pointer(struct ftgmac100 *priv,

From 80d887dbb673f007938c467fbfa118bba3e9f37d Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 24 Jul 2017 21:03:19 -0700
Subject: [PATCH 30/76] Revert "netvsc: optimize calculation of number of
 slots"

The logic for computing page buffer scatter does not take into
account the impact of compound pages. Therefore the optimization
to compute number of slots was incorrect and could cause stack
corruption a skb was sent with lots of fragments from huge pages.

This reverts commit 60b86665af0dfbeebda8aae43f0ba451cd2dcfe5.

Fixes: 60b86665af0d ("netvsc: optimize calculation of number of slots")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 43 +++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 63c98bbbc596..0d78727f1a14 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -315,14 +315,34 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
 	return slots_used;
 }
 
-/* Estimate number of page buffers neede to transmit
- * Need at most 2 for RNDIS header plus skb body and fragments.
- */
-static unsigned int netvsc_get_slots(const struct sk_buff *skb)
+static int count_skb_frag_slots(struct sk_buff *skb)
 {
-	return PFN_UP(offset_in_page(skb->data) + skb_headlen(skb))
-		+ skb_shinfo(skb)->nr_frags
-		+ 2;
+	int i, frags = skb_shinfo(skb)->nr_frags;
+	int pages = 0;
+
+	for (i = 0; i < frags; i++) {
+		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+		unsigned long size = skb_frag_size(frag);
+		unsigned long offset = frag->page_offset;
+
+		/* Skip unused frames from start of page */
+		offset &= ~PAGE_MASK;
+		pages += PFN_UP(offset + size);
+	}
+	return pages;
+}
+
+static int netvsc_get_slots(struct sk_buff *skb)
+{
+	char *data = skb->data;
+	unsigned int offset = offset_in_page(data);
+	unsigned int len = skb_headlen(skb);
+	int slots;
+	int frag_slots;
+
+	slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+	frag_slots = count_skb_frag_slots(skb);
+	return slots + frag_slots;
 }
 
 static u32 net_checksum_info(struct sk_buff *skb)
@@ -360,18 +380,21 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
 	struct hv_page_buffer *pb = page_buf;
 
-	/* We can only transmit MAX_PAGE_BUFFER_COUNT number
+	/* We will atmost need two pages to describe the rndis
+	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
 	 * of pages in a single packet. If skb is scattered around
 	 * more pages we try linearizing it.
 	 */
-	num_data_pgs = netvsc_get_slots(skb);
+
+	num_data_pgs = netvsc_get_slots(skb) + 2;
+
 	if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
 		++net_device_ctx->eth_stats.tx_scattered;
 
 		if (skb_linearize(skb))
 			goto no_memory;
 
-		num_data_pgs = netvsc_get_slots(skb);
+		num_data_pgs = netvsc_get_slots(skb) + 2;
 		if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
 			++net_device_ctx->eth_stats.tx_too_big;
 			goto drop;

From 4813497b537c6208c90d6cbecac5072d347de900 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 25 Jul 2017 14:35:03 +0200
Subject: [PATCH 31/76] net: ethernet: nb8800: Handle all 4 RGMII modes
 identically

Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear
whose responsibility it was to insert the required clock skew, and
in hindsight, some PHY drivers got it wrong. The solution forward
is to introduce a new property, explicitly requiring skew from the
node to which it is attached. In the interim, this driver will handle
all 4 RGMII modes identically (no skew).

Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller")
Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aurora/nb8800.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 041cfb7952f8..e94159507847 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net_device *dev)
 		mac_mode |= HALF_DUPLEX;
 
 	if (gigabit) {
-		if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+		if (phy_interface_is_rgmii(dev->phydev))
 			mac_mode |= RGMII_MODE;
 
 		mac_mode |= GMAC_MODE;
@@ -1268,11 +1268,10 @@ static int nb8800_tangox_init(struct net_device *dev)
 		break;
 
 	case PHY_INTERFACE_MODE_RGMII:
-		pad_mode = PAD_MODE_RGMII;
-		break;
-
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
-		pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+		pad_mode = PAD_MODE_RGMII;
 		break;
 
 	default:

From 9688f9b020263c4a17471d09bb18e34eccc1c0a5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 26 Jul 2017 11:33:49 +0200
Subject: [PATCH 32/76] udp: unbreak build lacking CONFIG_XFRM

We must use pre-processor conditional block or suitable accessors to
manipulate skb->sp elsewhere builds lacking the CONFIG_XFRM will break.

Fixes: dce4551cb2ad ("udp: preserve head state for IP_CMSG_PASSSEC")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d243772f6efc..fac7cb9e3b0f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1786,7 +1786,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 * the IP options and the cmsg flags, elsewhere can we clear all
 	 * pending head states while they are hot in the cache
 	 */
-	if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp))
+	if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
 		skb_release_head_state(skb);
 
 	rc = __udp_enqueue_schedule_skb(sk, skb);

From d94708a553022bf012fa95af10532a134eeb5a52 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 25 Jul 2017 09:44:25 -0700
Subject: [PATCH 33/76] bonding: commit link status change after propose

Commit de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
moves link status commitment into bond_mii_monitor(), but it still relies
on the return value of bond_miimon_inspect() as the hint. We need to return
non-zero as long as we propose a link status change.

Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 181839d6fbea..9bee6c1c70cc 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2050,6 +2050,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 				continue;
 
 			bond_propose_link_state(slave, BOND_LINK_FAIL);
+			commit++;
 			slave->delay = bond->params.downdelay;
 			if (slave->delay) {
 				netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -2088,6 +2089,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 				continue;
 
 			bond_propose_link_state(slave, BOND_LINK_BACK);
+			commit++;
 			slave->delay = bond->params.updelay;
 
 			if (slave->delay) {

From 0c3a8f8b8fabff4f3ad2dd7b95ae0e90cdd1aebb Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Tue, 25 Jul 2017 11:36:25 -0700
Subject: [PATCH 34/76] netpoll: Fix device name check in netpoll_setup()

Apparently netpoll_setup() assumes that netpoll.dev_name is a pointer
when checking if the device name is set:

if (np->dev_name) {
  ...

However the field is a character array, therefore the condition always
yields true. Check instead whether the first byte of the array has a
non-zero value.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8357f164c660..912731bed7b7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -666,7 +666,7 @@ int netpoll_setup(struct netpoll *np)
 	int err;
 
 	rtnl_lock();
-	if (np->dev_name) {
+	if (np->dev_name[0]) {
 		struct net *net = current->nsproxy->net_ns;
 		ndev = __dev_get_by_name(net, np->dev_name);
 	}

From d777b2ddbecf509bc61ee4f0fe0d3b5a909d698a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 25 Jul 2017 15:16:12 -0700
Subject: [PATCH 35/76] bpf: don't zero out the info struct in
 bpf_obj_get_info_by_fd()

The buffer passed to bpf_obj_get_info_by_fd() should be initialized
to zeros.  Kernel will enforce that to guarantee we can safely extend
info structures in the future.

Making the bpf_obj_get_info_by_fd() call in libbpf perform the zeroing
is problematic, however, since some members of the info structures
may need to be initialized by the callers (for instance pointers
to buffers to which kernel is to dump translated and jited images).

Remove the zeroing and fix up the in-tree callers before any kernel
has been released with this code.

As Daniel points out this seems to be the intended operation anyway,
since commit 95b9afd3987f ("bpf: Test for bpf ID") is itself setting
the buffer pointers before calling bpf_obj_get_info_by_fd().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                      | 1 -
 tools/testing/selftests/bpf/test_progs.c | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 412a7c82995a..256f571f2ab5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -314,7 +314,6 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 	int err;
 
 	bzero(&attr, sizeof(attr));
-	bzero(info, *info_len);
 	attr.info.bpf_fd = prog_fd;
 	attr.info.info_len = *info_len;
 	attr.info.info = ptr_to_u64(info);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5855cd3d3d45..1f7dd35551b9 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -340,6 +340,7 @@ static void test_bpf_obj_id(void)
 
 		/* Check getting prog info */
 		info_len = sizeof(struct bpf_prog_info) * 2;
+		bzero(&prog_infos[i], info_len);
 		prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
 		prog_infos[i].jited_prog_len = sizeof(jited_insns);
 		prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
@@ -369,6 +370,7 @@ static void test_bpf_obj_id(void)
 
 		/* Check getting map info */
 		info_len = sizeof(struct bpf_map_info) * 2;
+		bzero(&map_infos[i], info_len);
 		err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
 					     &info_len);
 		if (CHECK(err ||
@@ -394,7 +396,7 @@ static void test_bpf_obj_id(void)
 	nr_id_found = 0;
 	next_id = 0;
 	while (!bpf_prog_get_next_id(next_id, &next_id)) {
-		struct bpf_prog_info prog_info;
+		struct bpf_prog_info prog_info = {};
 		int prog_fd;
 
 		info_len = sizeof(prog_info);
@@ -418,6 +420,8 @@ static void test_bpf_obj_id(void)
 		nr_id_found++;
 
 		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+		prog_infos[i].jited_prog_insns = 0;
+		prog_infos[i].xlated_prog_insns = 0;
 		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
 		      memcmp(&prog_info, &prog_infos[i], info_len),
 		      "get-prog-info(next_id->fd)",
@@ -436,7 +440,7 @@ static void test_bpf_obj_id(void)
 	nr_id_found = 0;
 	next_id = 0;
 	while (!bpf_map_get_next_id(next_id, &next_id)) {
-		struct bpf_map_info map_info;
+		struct bpf_map_info map_info = {};
 		int map_fd;
 
 		info_len = sizeof(map_info);

From 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 14:19:09 +0800
Subject: [PATCH 36/76] dccp: fix a memleak that dccp_ipv6 doesn't put reqsk
 properly

In dccp_v6_conn_request, after reqsk gets alloced and hashed into
ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer,
one is for hlist, and the other one is for current using.

The problem is when dccp_v6_conn_request returns and finishes using
reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and
reqsk obj never gets freed.

Jianlin found this issue when running dccp_memleak.c in a loop, the
system memory would run out.

dccp_memleak.c:
  int s1 = socket(PF_INET6, 6, IPPROTO_IP);
  bind(s1, &sa1, 0x20);
  listen(s1, 0x9);
  int s2 = socket(PF_INET6, 6, IPPROTO_IP);
  connect(s2, &sa1, 0x20);
  close(s1);
  close(s2);

This patch is to put the reqsk before dccp_v6_conn_request returns,
just as what tcp_conn_request does.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c376af5bfdfb..1b58eac8aad3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+	reqsk_put(req);
 	return 0;
 
 drop_and_free:

From b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 14:19:46 +0800
Subject: [PATCH 37/76] dccp: fix a memleak that dccp_ipv4 doesn't put reqsk
 properly

The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk
properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue
exists on dccp_ipv4.

This patch is to fix it for dccp_ipv4.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f85d901f4e3f..1b202f16531f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+	reqsk_put(req);
 	return 0;
 
 drop_and_free:

From e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 14:20:15 +0800
Subject: [PATCH 38/76] dccp: fix a memleak for dccp_feat_init err process

In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc
memory for rx.val, it should free tx.val before returning an
error.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 1704948e6a12..f227f002c73d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
 	 * singleton values (which always leads to failure).
 	 * These settings can still (later) be overridden via sockopts.
 	 */
-	if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
-	    ccid_get_builtin_ccids(&rx.val, &rx.len))
+	if (ccid_get_builtin_ccids(&tx.val, &tx.len))
 		return -ENOBUFS;
+	if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
+		kfree(tx.val);
+		return -ENOBUFS;
+	}
 
 	if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
 	    !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))

From 6b84202c946cd3da3a8daa92c682510e9ed80321 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 16:24:59 +0800
Subject: [PATCH 39/76] sctp: fix the check for _sctp_walk_params and
 _sctp_walk_errors

Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
_sctp_walk_{params, errors}()") tried to fix the issue that it
may overstep the chunk end for _sctp_walk_{params, errors} with
'chunk_end > offset(length) + sizeof(length)'.

But it introduced a side effect: When processing INIT, it verifies
the chunks with 'param.v == chunk_end' after iterating all params
by sctp_walk_params(). With the check 'chunk_end > offset(length)
+ sizeof(length)', it would return when the last param is not yet
accessed. Because the last param usually is fwdtsn supported param
whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'

This is a badly issue even causing sctp couldn't process 4-shakes.
Client would always get abort when connecting to server, due to
the failure of INIT chunk verification on server.

The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
instead of 'chunk_end < offset(length) + sizeof(length)' for both
_sctp_walk_params and _sctp_walk_errors.

Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 980807d7506f..45fd4c6056b5 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
-     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
+     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
       (void *)chunk + end) &&\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
+     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \

From 58f36b4526add4870476e1dc97a8467cf16aeee6 Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Wed, 26 Jul 2017 12:24:10 +0100
Subject: [PATCH 40/76] brcmfmac: Don't grow SKB by negative size

The commit to rework the headroom check in start_xmit() now calls
pxskb_expand_head() unconditionally if the header is CoW. Unfortunately,
it does so with the delta between the extant headroom and the header
length, which may be negative if there is already sufficient headroom.

pskb_expand_head() does allow for size being 0, in which case it just
copies, so clamp the header delta to zero.

Opening Chrome (and all my tabs) on a PCIE device was enough to reliably
hit this.

Fixes: 270a6c1f65fe ("brcmfmac: rework headroom check in .start_xmit()")
Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Arend Van Spriel <arend.vanspriel@broadcom.com>
Cc: James Hughes <james.hughes@raspberrypi.org>
Cc: Hante Meuleman <hante.meuleman@broadcom.com>
Cc: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Cc: Franky Lin <franky.lin@broadcom.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 2153e8062b4c..5cc3a07dda9e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -214,7 +214,7 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb,
 
 	/* Make sure there's enough writeable headroom */
 	if (skb_headroom(skb) < drvr->hdrlen || skb_header_cloned(skb)) {
-		head_delta = drvr->hdrlen - skb_headroom(skb);
+		head_delta = max_t(int, drvr->hdrlen - skb_headroom(skb), 0);
 
 		brcmf_dbg(INFO, "%s: insufficient headroom (%d)\n",
 			  brcmf_ifname(ifp), head_delta);

From 5f5d03143de5e0c593da4ab18fc6393c2815e108 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Wed, 26 Jul 2017 13:09:24 +0100
Subject: [PATCH 41/76] brcmfmac: fix memleak due to calling
 brcmf_sdiod_sgtable_alloc() twice

Due to a bugfix in wireless tree and the commit mentioned below a merge
was needed which went haywire. So the submitted change resulted in the
function brcmf_sdiod_sgtable_alloc() being called twice during the probe
thus leaking the memory of the first call.

Cc: stable@vger.kernel.org # 4.6.x
Fixes: 4d7928959832 ("brcmfmac: switch to new platform data")
Reported-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index c3ecec673eb7..f3556122c6ac 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4175,11 +4175,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 		goto fail;
 	}
 
-	/* allocate scatter-gather table. sg support
-	 * will be disabled upon allocation failure.
-	 */
-	brcmf_sdiod_sgtable_alloc(bus->sdiodev);
-
 	/* Query the F2 block size, set roundup accordingly */
 	bus->blocksize = bus->sdiodev->func[2]->cur_blksize;
 	bus->roundup = min(max_roundup, bus->blocksize);

From 079adf05398f12371ea91dd53fdf61ee6352bc01 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 1 Jun 2017 09:40:56 +0300
Subject: [PATCH 42/76] net/mlx5: Clean SRIOV eswitch resources upon VF
 creation failure

Upon sriov enable, eswitch is always enabled.
Currently, if enable hca failed over all VFs, we would skip eswitch
disable as part of sriov disable, which will lead to resources leak.

Fix it by disabling eswitch if it was enabled (use indication from
eswitch mode).

Fixes: 6b6adee3dad2 ('net/mlx5: SRIOV core code refactoring')
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 89bfda419efe..8b18cc9ec026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1668,7 +1668,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	int i;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH ||
+	    esw->mode == SRIOV_NONE)
 		return;
 
 	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index bcdf7779c48d..bf99d40e30b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -88,7 +88,11 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	int vf;
 
 	if (!sriov->enabled_vfs)
+#ifdef CONFIG_MLX5_CORE_EN
+		goto disable_sriov_resources;
+#else
 		return;
+#endif
 
 	for (vf = 0; vf < sriov->num_vfs; vf++) {
 		if (!sriov->vfs_ctx[vf].enabled)
@@ -103,6 +107,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	}
 
 #ifdef CONFIG_MLX5_CORE_EN
+disable_sriov_resources:
 	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 #endif
 

From dc798b4cc0f2a06e7ad7d522403de274b86a0a6f Mon Sep 17 00:00:00 2001
From: Aviv Heller <avivh@mellanox.com>
Date: Sun, 2 Jul 2017 19:13:43 +0300
Subject: [PATCH 43/76] net/mlx5: Consider tx_enabled in all modes on remap

The tx_enabled lag event field is used to determine whether a slave is
active.
Current logic uses this value only if the mode is active-backup.

However, LACP mode, although considered a load balancing mode, can mark
a slave as inactive in certain situations (e.g., LACP timeout).

This fix takes the tx_enabled value into account when remapping, with
no respect to the LAG mode (this should not affect the behavior in XOR
mode, since in this mode both slaves are marked as active).

Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 25 ++++++++-----------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index a3a836bdcfd2..f26f97fe4666 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -162,22 +162,17 @@ static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 					   u8 *port1, u8 *port2)
 {
-	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
-		if (tracker->netdev_state[0].tx_enabled) {
-			*port1 = 1;
-			*port2 = 1;
-		} else {
-			*port1 = 2;
-			*port2 = 2;
-		}
-	} else {
-		*port1 = 1;
-		*port2 = 2;
-		if (!tracker->netdev_state[0].link_up)
-			*port1 = 2;
-		else if (!tracker->netdev_state[1].link_up)
-			*port2 = 1;
+	*port1 = 1;
+	*port2 = 2;
+	if (!tracker->netdev_state[0].tx_enabled ||
+	    !tracker->netdev_state[0].link_up) {
+		*port1 = 2;
+		return;
 	}
+
+	if (!tracker->netdev_state[1].tx_enabled ||
+	    !tracker->netdev_state[1].link_up)
+		*port2 = 1;
 }
 
 static void mlx5_activate_lag(struct mlx5_lag *ldev,

From 061870800efb4e3d1ad4082a2569363629bdfcfc Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 6 Jul 2017 15:48:40 +0300
Subject: [PATCH 44/76] net/mlx5: Fix command completion after timeout access
 invalid structure

Completion on timeout should not free the driver command entry structure
as it will need to access it again once real completion event from FW
will occur.

Fixes: 73dd3a4839c1 ('net/mlx5: Avoid using pending command interface slots')
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index f5a2c605749f..25fd32fcdf79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -967,7 +967,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 
 	err = wait_func(dev, ent);
 	if (err == -ETIMEDOUT)
-		goto out_free;
+		goto out;
 
 	ds = ent->ts2 - ent->ts1;
 	op = MLX5_GET(mbox_in, in->first.data, opcode);
@@ -1430,6 +1430,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
 						      ent->idx);
 					free_ent(cmd, ent->idx);
+					free_cmd(ent);
 				}
 				continue;
 			}
@@ -1488,7 +1489,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 				free_msg(dev, ent->in);
 
 				err = err ? err : ent->status;
-				free_cmd(ent);
+				if (!forced)
+					free_cmd(ent);
 				callback(err, context);
 			} else {
 				complete(&ent->done);

From 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Sun, 25 Jun 2017 18:45:32 +0300
Subject: [PATCH 45/76] net/mlx5: Fix command bad flow on command entry
 allocation failure

When driver fail to allocate an entry to send command to FW, it must
notify the calling function and release the memory allocated for
this command.

Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 25fd32fcdf79..31cbe5e86a01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -786,6 +786,10 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+			      struct mlx5_cmd_msg *msg);
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -796,17 +800,28 @@ static void cmd_work_handler(struct work_struct *work)
 	struct semaphore *sem;
 	unsigned long flags;
 	bool poll_cmd = ent->polling;
+	int alloc_ret;
 
 
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
 	if (!ent->page_queue) {
-		ent->idx = alloc_ent(cmd);
-		if (ent->idx < 0) {
+		alloc_ret = alloc_ent(cmd);
+		if (alloc_ret < 0) {
 			mlx5_core_err(dev, "failed to allocate command entry\n");
+			if (ent->callback) {
+				ent->callback(-EAGAIN, ent->context);
+				mlx5_free_cmd_msg(dev, ent->out);
+				free_msg(dev, ent->in);
+				free_cmd(ent);
+			} else {
+				ent->ret = -EAGAIN;
+				complete(&ent->done);
+			}
 			up(sem);
 			return;
 		}
+		ent->idx = alloc_ret;
 	} else {
 		ent->idx = cmd->max_reg_cmds;
 		spin_lock_irqsave(&cmd->alloc_lock, flags);

From 58569ef8f619761548e7d198f59e8ebe3af91d04 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 6 Jul 2017 15:40:32 +0300
Subject: [PATCH 46/76] net/mlx5e: IPoIB, Modify add/remove underlay QPN flows

On interface remove, the clean-up was done incorrectly causing
an error in the log:
"SET_FLOW_TABLE_ROOT(0x92f) op_mod(0x0) failed...syndrome (0x7e9f14)"

This was caused by the following flow:
-ndo_uninit:
 Move QP state to RST (this disconnects the QP from FT),
 the QP cannot be attached to any FT unless it is in RTS.

-mlx5_rdma_netdev_free:
 cleanup_rx: Destroy FT
 cleanup_tx: Destroy QP and remove QPN from FT

This caused a problem when destroying current FT we tried to
re-attach the QP to the next FT which is not needed.

The correct flow is:
-mlx5_rdma_netdev_free:
	cleanup_rx: remove QPN from FT & Destroy FT
	cleanup_tx: Destroy QP

Fixes: 508541146af1 ("net/mlx5: Use underlay QPN from the root name space")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c    | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 1ee5bce85901..85298051a3e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -178,8 +178,6 @@ out:
 
 static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
-	mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn);
-
 	mlx5_core_destroy_qp(mdev, qp);
 }
 
@@ -194,8 +192,6 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
-	mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
-
 	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
@@ -253,6 +249,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
+	struct mlx5i_priv *ipriv  = priv->ppriv;
 	int err;
 
 	err = mlx5e_create_indirect_rqt(priv);
@@ -271,12 +268,18 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_indirect_tirs;
 
-	err = mlx5i_create_flow_steering(priv);
+	err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
 	if (err)
 		goto err_destroy_direct_tirs;
 
+	err = mlx5i_create_flow_steering(priv);
+	if (err)
+		goto err_remove_rx_underlay_qpn;
+
 	return 0;
 
+err_remove_rx_underlay_qpn:
+	mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
 err_destroy_direct_tirs:
 	mlx5e_destroy_direct_tirs(priv);
 err_destroy_indirect_tirs:
@@ -290,6 +293,9 @@ err_destroy_indirect_rqts:
 
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
+	struct mlx5i_priv *ipriv  = priv->ppriv;
+
+	mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
 	mlx5i_destroy_flow_steering(priv);
 	mlx5e_destroy_direct_tirs(priv);
 	mlx5e_destroy_indirect_tirs(priv);

From 0242f4a0bb03906010bbf80495512be00494a0ef Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Wed, 5 Jul 2017 10:17:04 +0300
Subject: [PATCH 47/76] net/mlx5e: Fix outer_header_zero() check size

outer_header_zero() routine checks if the outer_headers match of a
flow-table entry are all zero.

This function uses the size of whole fte_match_param, instead of just
the outer_headers member, causing failure to detect all-zeros if
any other members of the fte_match_param are non-zero.

Use the correct size for zero check.

Fixes: 6dc6071cfcde ("net/mlx5e: Add ethtool flow steering support")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index bdd82c9b3992..22fb993987b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5e_priv *priv,
 
 static bool outer_header_zero(u32 *match_criteria)
 {
-	int size = MLX5_ST_SZ_BYTES(fte_match_param);
+	int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
 	char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
 					     outer_headers);
 

From 0b794ffae7afa7c4e5accac8791c4b78e8d080ce Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 15:11:26 +0300
Subject: [PATCH 48/76] net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size

Fix miscalculation in reserved_at_1a0 field.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 87869c04849a..fd98aef4545c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8175,7 +8175,7 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
 
-	u8         reserved_at_1a0[0x60];
+	u8         reserved_at_1a0[0x40];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {

From fa3676885e3b5be1edfa1b2cc775e20a45b34a19 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 16:09:34 +0300
Subject: [PATCH 49/76] net/mlx5e: Add field select to MTPPS register

In order to mark relevant fields while setting the MTPPS register
add field select. Otherwise it can cause a misconfiguration in
firmware.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_clock.c    | 29 +++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |  2 +-
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  5 ++++
 include/linux/mlx5/mlx5_ifc.h                 | 10 +++++--
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 66f432385dbb..ab07233e2faa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -53,6 +53,15 @@ enum {
 	MLX5E_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
 };
 
+enum {
+	MLX5E_MTPPS_FS_ENABLE			= BIT(0x0),
+	MLX5E_MTPPS_FS_PATTERN			= BIT(0x2),
+	MLX5E_MTPPS_FS_PIN_MODE			= BIT(0x3),
+	MLX5E_MTPPS_FS_TIME_STAMP		= BIT(0x4),
+	MLX5E_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
+	MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
+};
+
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
 			struct skb_shared_hwtstamps *hwts)
 {
@@ -221,7 +230,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 
 		/* For future use need to add a loop for finding all 1PPS out pins */
 		MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
-		MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF);
+		MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
+		MLX5_SET(mtpps_reg, in, field_select,
+			 MLX5E_MTPPS_FS_PIN_MODE |
+			 MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
 
 		mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	}
@@ -257,8 +269,7 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	int err = 0;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps) ||
-	    !MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->extts.index >= tstamp->ptp_info.n_pins)
@@ -277,6 +288,9 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
 	MLX5_SET(mtpps_reg, in, pattern, pattern);
 	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE);
 
 	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	if (err)
@@ -302,7 +316,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	s64 ns;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->perout.index >= tstamp->ptp_info.n_pins)
@@ -337,7 +351,10 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
 	MLX5_SET(mtpps_reg, in, enable, on);
 	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
-
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE |
+					      MLX5E_MTPPS_FS_TIME_STAMP);
 	return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 }
 
@@ -487,7 +504,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 #define MAX_PIN_NUM	8
 	tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
 	if (tstamp->pps_pin_caps) {
-		if (MLX5_CAP_GEN(priv->mdev, pps))
+		if (MLX5_PPS_CAP(priv->mdev))
 			mlx5e_get_pps_caps(priv, tstamp);
 		if (tstamp->ptp_info.n_pins)
 			mlx5e_init_pin_config(tstamp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index af51a5d2b912..52b9a64cd3a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -698,7 +698,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	else
 		mlx5_core_dbg(dev, "port_module_event is not set\n");
 
-	if (MLX5_CAP_GEN(dev, pps))
+	if (MLX5_PPS_CAP(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 
 	if (MLX5_CAP_GEN(dev, fpga))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 6a3d6bef7dd4..6a263e8d883a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -154,6 +154,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
 int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
 int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
+			    MLX5_CAP_GEN((mdev), pps_modify) &&		\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
 int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
 
 void mlx5e_init(void);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd98aef4545c..3030121b4746 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7749,8 +7749,10 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7f];
+	u8         reserved_at_0[0x7d];
 
+	u8         mtpps_enh_out_per_adj[0x1];
+	u8         mtpps_fs[0x1];
 	u8         pcie_performance_group[0x1];
 };
 
@@ -8159,7 +8161,8 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         reserved_at_78[0x4];
 	u8         cap_pin_4_mode[0x4];
 
-	u8         reserved_at_80[0x80];
+	u8         field_select[0x20];
+	u8         reserved_at_a0[0x60];
 
 	u8         enable[0x1];
 	u8         reserved_at_101[0xb];
@@ -8174,8 +8177,9 @@ struct mlx5_ifc_mtpps_reg_bits {
 
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
+	u8         enhanced_out_periodic_adjustment[0x20];
 
-	u8         reserved_at_1a0[0x40];
+	u8         reserved_at_1c0[0x20];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {

From 49c5031ca6f0628ef973a11b17e463e088bf859e Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Sun, 28 May 2017 12:01:38 +0300
Subject: [PATCH 50/76] net/mlx5e: Fix broken disable 1PPS flow

Need to disable the MTPPS and unsubscribe from the pulse events
when user disables the 1PPS functionality.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_clock.c    | 75 ++++++++++++-------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index ab07233e2faa..25d43767c888 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -265,6 +265,8 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	struct mlx5e_priv *priv =
 		container_of(tstamp, struct mlx5e_priv, tstamp);
 	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	u32 field_select = 0;
+	u8 pin_mode = 0;
 	u8 pattern = 0;
 	int pin = -1;
 	int err = 0;
@@ -279,18 +281,21 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 		pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index);
 		if (pin < 0)
 			return -EBUSY;
+		pin_mode = MLX5E_PIN_MODE_IN;
+		pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+		field_select = MLX5E_MTPPS_FS_PIN_MODE |
+			       MLX5E_MTPPS_FS_PATTERN |
+			       MLX5E_MTPPS_FS_ENABLE;
+	} else {
+		pin = rq->extts.index;
+		field_select = MLX5E_MTPPS_FS_ENABLE;
 	}
 
-	if (rq->extts.flags & PTP_FALLING_EDGE)
-		pattern = 1;
-
 	MLX5_SET(mtpps_reg, in, pin, pin);
-	MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
 	MLX5_SET(mtpps_reg, in, pattern, pattern);
 	MLX5_SET(mtpps_reg, in, enable, on);
-	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
-					      MLX5E_MTPPS_FS_PATTERN |
-					      MLX5E_MTPPS_FS_ENABLE);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
 
 	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	if (err)
@@ -313,6 +318,9 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	u64 cycles_now, cycles_delta;
 	struct timespec64 ts;
 	unsigned long flags;
+	u32 field_select = 0;
+	u8 pin_mode = 0;
+	u8 pattern = 0;
 	int pin = -1;
 	s64 ns;
 
@@ -327,34 +335,43 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 				   rq->perout.index);
 		if (pin < 0)
 			return -EBUSY;
-	}
 
-	ts.tv_sec = rq->perout.period.sec;
-	ts.tv_nsec = rq->perout.period.nsec;
-	ns = timespec64_to_ns(&ts);
-	if (on)
+		pin_mode = MLX5E_PIN_MODE_OUT;
+		pattern = MLX5E_OUT_PATTERN_PERIODIC;
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+
 		if ((ns >> 1) != 500000000LL)
 			return -EINVAL;
-	ts.tv_sec = rq->perout.start.sec;
-	ts.tv_nsec = rq->perout.start.nsec;
-	ns = timespec64_to_ns(&ts);
-	cycles_now = mlx5_read_internal_timer(tstamp->mdev);
-	write_lock_irqsave(&tstamp->lock, flags);
-	nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
-	nsec_delta = ns - nsec_now;
-	cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
-				 tstamp->cycles.mult);
-	write_unlock_irqrestore(&tstamp->lock, flags);
-	time_stamp = cycles_now + cycles_delta;
+
+		ts.tv_sec = rq->perout.start.sec;
+		ts.tv_nsec = rq->perout.start.nsec;
+		ns = timespec64_to_ns(&ts);
+		cycles_now = mlx5_read_internal_timer(tstamp->mdev);
+		write_lock_irqsave(&tstamp->lock, flags);
+		nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
+		nsec_delta = ns - nsec_now;
+		cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
+					 tstamp->cycles.mult);
+		write_unlock_irqrestore(&tstamp->lock, flags);
+		time_stamp = cycles_now + cycles_delta;
+		field_select = MLX5E_MTPPS_FS_PIN_MODE |
+			       MLX5E_MTPPS_FS_PATTERN |
+			       MLX5E_MTPPS_FS_ENABLE |
+			       MLX5E_MTPPS_FS_TIME_STAMP;
+	} else {
+		pin = rq->perout.index;
+		field_select = MLX5E_MTPPS_FS_ENABLE;
+	}
+
 	MLX5_SET(mtpps_reg, in, pin, pin);
-	MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
-	MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+	MLX5_SET(mtpps_reg, in, pattern, pattern);
 	MLX5_SET(mtpps_reg, in, enable, on);
 	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
-	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
-					      MLX5E_MTPPS_FS_PATTERN |
-					      MLX5E_MTPPS_FS_ENABLE |
-					      MLX5E_MTPPS_FS_TIME_STAMP);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
+
 	return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 }
 

From 4272f9b88db9223216cdf87314f570f6d81295b4 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Sun, 28 May 2017 14:06:01 +0300
Subject: [PATCH 51/76] net/mlx5e: Change 1PPS out scheme

In order to fix the drift in 1PPS out need to adjust the next pulse.
On each 1PPS out falling edge driver gets the event, then the event
handler adjusts the next pulse starting time.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   9 +-
 .../ethernet/mellanox/mlx5/core/en_clock.c    | 116 ++++++++++++------
 2 files changed, 87 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e1b7ddfecd01..d42826f0f0d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -266,6 +266,13 @@ struct mlx5e_dcbx {
 };
 #endif
 
+#define MAX_PIN_NUM	8
+struct mlx5e_pps {
+	u8                         pin_caps[MAX_PIN_NUM];
+	struct work_struct         out_work;
+	u64                        start[MAX_PIN_NUM];
+};
+
 struct mlx5e_tstamp {
 	rwlock_t                   lock;
 	struct cyclecounter        cycles;
@@ -277,7 +284,7 @@ struct mlx5e_tstamp {
 	struct mlx5_core_dev      *mdev;
 	struct ptp_clock          *ptp;
 	struct ptp_clock_info      ptp_info;
-	u8                        *pps_pin_caps;
+	struct mlx5e_pps           pps_info;
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 25d43767c888..464ddd10ebbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -82,6 +82,33 @@ static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc)
 	return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
 }
 
+static void mlx5e_pps_out(struct work_struct *work)
+{
+	struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps,
+						  out_work);
+	struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp,
+						   pps_info);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
+		u64 tstart;
+
+		write_lock_irqsave(&tstamp->lock, flags);
+		tstart = tstamp->pps_info.start[i];
+		tstamp->pps_info.start[i] = 0;
+		write_unlock_irqrestore(&tstamp->lock, flags);
+		if (!tstart)
+			continue;
+
+		MLX5_SET(mtpps_reg, in, pin, i);
+		MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+		MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP);
+		mlx5_set_mtpps(tstamp->mdev, in, sizeof(in));
+	}
+}
+
 static void mlx5e_timestamp_overflow(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
@@ -222,21 +249,6 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 	int neg_adj = 0;
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						  ptp_info);
-	struct mlx5e_priv *priv =
-		container_of(tstamp, struct mlx5e_priv, tstamp);
-
-	if (MLX5_CAP_GEN(priv->mdev, pps_modify)) {
-		u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-
-		/* For future use need to add a loop for finding all 1PPS out pins */
-		MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
-		MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
-		MLX5_SET(mtpps_reg, in, field_select,
-			 MLX5E_MTPPS_FS_PIN_MODE |
-			 MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
-
-		mlx5_set_mtpps(priv->mdev, in, sizeof(in));
-	}
 
 	if (delta < 0) {
 		neg_adj = 1;
@@ -314,7 +326,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	struct mlx5e_priv *priv =
 		container_of(tstamp, struct mlx5e_priv, tstamp);
 	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-	u64 nsec_now, nsec_delta, time_stamp;
+	u64 nsec_now, nsec_delta, time_stamp = 0;
 	u64 cycles_now, cycles_delta;
 	struct timespec64 ts;
 	unsigned long flags;
@@ -322,6 +334,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	u8 pin_mode = 0;
 	u8 pattern = 0;
 	int pin = -1;
+	int err = 0;
 	s64 ns;
 
 	if (!MLX5_PPS_CAP(priv->mdev))
@@ -372,7 +385,12 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
 	MLX5_SET(mtpps_reg, in, field_select, field_select);
 
-	return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	return mlx5_set_mtppse(priv->mdev, pin, 0,
+			       MLX5E_EVENT_MODE_REPETETIVE & on);
 }
 
 static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
@@ -456,22 +474,50 @@ static void mlx5e_get_pps_caps(struct mlx5e_priv *priv,
 	tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
 					      cap_max_num_of_pps_out_pins);
 
-	tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
-	tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
-	tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
-	tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
-	tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
-	tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
-	tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
-	tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+	tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+	tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+	tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+	tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+	tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+	tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+	tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+	tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
 }
 
 void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
 			     struct ptp_clock_event *event)
 {
+	struct net_device *netdev = priv->netdev;
 	struct mlx5e_tstamp *tstamp = &priv->tstamp;
+	struct timespec64 ts;
+	u64 nsec_now, nsec_delta;
+	u64 cycles_now, cycles_delta;
+	int pin = event->index;
+	s64 ns;
+	unsigned long flags;
 
-	ptp_clock_event(tstamp->ptp, event);
+	switch (tstamp->ptp_info.pin_config[pin].func) {
+	case PTP_PF_EXTTS:
+		ptp_clock_event(tstamp->ptp, event);
+		break;
+	case PTP_PF_PEROUT:
+		mlx5e_ptp_gettime(&tstamp->ptp_info, &ts);
+		cycles_now = mlx5_read_internal_timer(tstamp->mdev);
+		ts.tv_sec += 1;
+		ts.tv_nsec = 0;
+		ns = timespec64_to_ns(&ts);
+		write_lock_irqsave(&tstamp->lock, flags);
+		nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
+		nsec_delta = ns - nsec_now;
+		cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
+					 tstamp->cycles.mult);
+		tstamp->pps_info.start[pin] = cycles_now + cycles_delta;
+		queue_work(priv->wq, &tstamp->pps_info.out_work);
+		write_unlock_irqrestore(&tstamp->lock, flags);
+		break;
+	default:
+		netdev_err(netdev, "%s: Unhandled event\n", __func__);
+	}
 }
 
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
@@ -507,6 +553,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 	do_div(ns, NSEC_PER_SEC / 2 / HZ);
 	tstamp->overflow_period = ns;
 
+	INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
 	INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
 	if (tstamp->overflow_period)
 		schedule_delayed_work(&tstamp->overflow_work, 0);
@@ -518,16 +565,10 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 	snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
 
 	/* Initialize 1PPS data structures */
-#define MAX_PIN_NUM	8
-	tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
-	if (tstamp->pps_pin_caps) {
-		if (MLX5_PPS_CAP(priv->mdev))
-			mlx5e_get_pps_caps(priv, tstamp);
-		if (tstamp->ptp_info.n_pins)
-			mlx5e_init_pin_config(tstamp);
-	} else {
-		mlx5_core_warn(priv->mdev, "1PPS initialization failed\n");
-	}
+	if (MLX5_PPS_CAP(priv->mdev))
+		mlx5e_get_pps_caps(priv, tstamp);
+	if (tstamp->ptp_info.n_pins)
+		mlx5e_init_pin_config(tstamp);
 
 	tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
 					 &priv->mdev->pdev->dev);
@@ -550,7 +591,8 @@ void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv)
 		priv->tstamp.ptp = NULL;
 	}
 
-	kfree(tstamp->pps_pin_caps);
+	cancel_work_sync(&tstamp->pps_info.out_work);
+
 	kfree(tstamp->ptp_info.pin_config);
 
 	cancel_delayed_work_sync(&tstamp->overflow_work);

From cf5033089b078303b102b65e3ccbbfa3ce0f4367 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Sun, 28 May 2017 14:27:02 +0300
Subject: [PATCH 52/76] net/mlx5e: Add missing support for PTP_CLK_REQ_PPS
 request

Add the missing option to enable the PTP_CLK_PPS function.
In this case pin should be configured as 1PPS IN first and
then it will be connected to PPS mechanism.
Events will be reported as PTP_CLOCK_PPSUSR events to relevant sysfs.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../ethernet/mellanox/mlx5/core/en_clock.c    | 20 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  1 -
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d42826f0f0d6..0039b4725405 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -271,6 +271,7 @@ struct mlx5e_pps {
 	u8                         pin_caps[MAX_PIN_NUM];
 	struct work_struct         out_work;
 	u64                        start[MAX_PIN_NUM];
+	u8                         enabled;
 };
 
 struct mlx5e_tstamp {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 464ddd10ebbc..ba355c6b6e1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -393,6 +393,17 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 			       MLX5E_EVENT_MODE_REPETETIVE & on);
 }
 
+static int mlx5e_pps_configure(struct ptp_clock_info *ptp,
+			       struct ptp_clock_request *rq,
+			       int on)
+{
+	struct mlx5e_tstamp *tstamp =
+		container_of(ptp, struct mlx5e_tstamp, ptp_info);
+
+	tstamp->pps_info.enabled = !!on;
+	return 0;
+}
+
 static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
 			    struct ptp_clock_request *rq,
 			    int on)
@@ -402,6 +413,8 @@ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
 		return mlx5e_extts_configure(ptp, rq, on);
 	case PTP_CLK_REQ_PEROUT:
 		return mlx5e_perout_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PPS:
+		return mlx5e_pps_configure(ptp, rq, on);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -447,6 +460,7 @@ static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp)
 		return -ENOMEM;
 	tstamp->ptp_info.enable = mlx5e_ptp_enable;
 	tstamp->ptp_info.verify = mlx5e_ptp_verify;
+	tstamp->ptp_info.pps = 1;
 
 	for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
 		snprintf(tstamp->ptp_info.pin_config[i].name,
@@ -498,6 +512,12 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
 
 	switch (tstamp->ptp_info.pin_config[pin].func) {
 	case PTP_PF_EXTTS:
+		if (tstamp->pps_info.enabled) {
+			event->type = PTP_CLOCK_PPSUSR;
+			event->pps_times.ts_real = ns_to_timespec64(event->timestamp);
+		} else {
+			event->type = PTP_CLOCK_EXTTS;
+		}
 		ptp_clock_event(tstamp->ptp, event);
 		break;
 	case PTP_PF_PEROUT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1eac5003084f..57f31fa478ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -377,7 +377,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 		break;
 	case MLX5_DEV_EVENT_PPS:
 		eqe = (struct mlx5_eqe *)param;
-		ptp_event.type = PTP_CLOCK_EXTTS;
 		ptp_event.index = eqe->data.pps.pin;
 		ptp_event.timestamp =
 			timecounter_cyc2time(&priv->tstamp.clock,

From d439c84509a510e864fdc6166c760482cd03fc57 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Wed, 12 Jul 2017 17:27:18 +0300
Subject: [PATCH 53/76] net/mlx5e: Fix wrong delay calculation for overflow
 check scheduling

The overflow_period is calculated in seconds. In order to use it
for delayed work scheduling translation to jiffies is needed.

Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index ba355c6b6e1f..cde8e8e772ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -119,7 +119,8 @@ static void mlx5e_timestamp_overflow(struct work_struct *work)
 	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
 	write_unlock_irqrestore(&tstamp->lock, flags);
-	schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
+	schedule_delayed_work(&tstamp->overflow_work,
+			      msecs_to_jiffies(tstamp->overflow_period * 1000));
 }
 
 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)

From f08c39ed0bfb503c7b3e013cd40d036ce6a0941a Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Wed, 12 Jul 2017 17:44:07 +0300
Subject: [PATCH 54/76] net/mlx5e: Schedule overflow check work to mlx5e
 workqueue

This is done in order to ensure that work will not run after the cleanup.

Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index cde8e8e772ec..84dd63e74041 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -114,13 +114,14 @@ static void mlx5e_timestamp_overflow(struct work_struct *work)
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
 						   overflow_work);
+	struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp);
 	unsigned long flags;
 
 	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
 	write_unlock_irqrestore(&tstamp->lock, flags);
-	schedule_delayed_work(&tstamp->overflow_work,
-			      msecs_to_jiffies(tstamp->overflow_period * 1000));
+	queue_delayed_work(priv->wq, &tstamp->overflow_work,
+			   msecs_to_jiffies(tstamp->overflow_period * 1000));
 }
 
 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
@@ -577,7 +578,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 	INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
 	INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
 	if (tstamp->overflow_period)
-		schedule_delayed_work(&tstamp->overflow_work, 0);
+		queue_delayed_work(priv->wq, &tstamp->overflow_work, 0);
 	else
 		mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
 
@@ -613,8 +614,6 @@ void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv)
 	}
 
 	cancel_work_sync(&tstamp->pps_info.out_work);
-
-	kfree(tstamp->ptp_info.pin_config);
-
 	cancel_delayed_work_sync(&tstamp->overflow_work);
+	kfree(tstamp->ptp_info.pin_config);
 }

From bcec601f30fb41e9233674942fa4040a6e63657a Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 6 Jul 2017 16:40:34 +0300
Subject: [PATCH 55/76] net/mlx5: Fix mlx5_add_flow_rules call with correct num
 of dests

When adding ethtool steering rule with action DISCARD we wrongly
pass a NULL dest with dest_num 1 to mlx5_add_flow_rules().
What this error seems to have caused is sending VPORT 0
(MLX5_FLOW_DESTINATION_TYPE_VPORT) as the fte dest instead of no dests.
We have fte action correctly set to DROP so it might been ignored
anyways.

To reproduce use:
 # sudo ethtool --config-nfc <dev> flow-type ether \
   dst aa:bb:cc:dd:ee:ff action -1

Fixes: 74491de93712 ("net/mlx5: Add multi dest support")
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 22fb993987b4..eafc59280ada 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -320,7 +320,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
 	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",

From 4c3464a8c31829e6e7efbf8258dc062f761509d7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 17:13:59 +0200
Subject: [PATCH 56/76] net: phy: rework Kconfig settings for MDIO_BUS

I still see build errors in randconfig builds and have had this
patch for a while to locally work around it:

drivers/built-in.o: In function `xgene_mdio_probe':
mux-core.c:(.text+0x352154): undefined reference to `of_mdiobus_register'
mux-core.c:(.text+0x352168): undefined reference to `mdiobus_free'
mux-core.c:(.text+0x3521c0): undefined reference to `mdiobus_alloc_size'

The idea is that CONFIG_MDIO_BUS now reflects whether the mdio_bus
code is built-in or a module, and other drivers that use the core
code can simply depend on that, instead of having a complex
dependency line.

Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 2dda72004a7d..928fd892f167 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -7,7 +7,16 @@ menuconfig MDIO_DEVICE
 	help
 	   MDIO devices and driver infrastructure code.
 
-if MDIO_DEVICE
+config MDIO_BUS
+	tristate
+	default m if PHYLIB=m
+	default MDIO_DEVICE
+	help
+	  This internal symbol is used for link time dependencies and it
+	  reflects whether the mdio_bus/mdio_device code is built as a
+	  loadable module or built-in.
+
+if MDIO_BUS
 
 config MDIO_BCM_IPROC
 	tristate "Broadcom iProc MDIO bus controller"
@@ -28,7 +37,6 @@ config MDIO_BCM_UNIMAC
 
 config MDIO_BITBANG
 	tristate "Bitbanged MDIO buses"
-	depends on !(MDIO_DEVICE=y && PHYLIB=m)
 	help
 	  This module implements the MDIO bus protocol in software,
 	  for use by low level drivers that export the ability to
@@ -127,7 +135,6 @@ config MDIO_THUNDER
 	tristate "ThunderX SOCs MDIO buses"
 	depends on 64BIT
 	depends on PCI
-	depends on !(MDIO_DEVICE=y && PHYLIB=m)
 	select MDIO_CAVIUM
 	help
 	  This driver supports the MDIO interfaces found on Cavium

From 245db3c349e0b413e0fec8d6179f6b767649ad63 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 17:14:00 +0200
Subject: [PATCH 57/76] phy: bcm-ns-usb3: fix MDIO_BUS dependency

The driver attempts to 'select MDIO_DEVICE', but the code
is actually a loadable module when PHYLIB=m:

drivers/phy/broadcom/phy-bcm-ns-usb3.o: In function `bcm_ns_usb3_mdiodev_phy_write':
phy-bcm-ns-usb3.c:(.text.bcm_ns_usb3_mdiodev_phy_write+0x28): undefined reference to `mdiobus_write'
drivers/phy/broadcom/phy-bcm-ns-usb3.o: In function `bcm_ns_usb3_module_exit':
phy-bcm-ns-usb3.c:(.exit.text+0x18): undefined reference to `mdio_driver_unregister'
drivers/phy/broadcom/phy-bcm-ns-usb3.o: In function `bcm_ns_usb3_module_init':
phy-bcm-ns-usb3.c:(.init.text+0x18): undefined reference to `mdio_driver_register'
phy-bcm-ns-usb3.c:(.init.text+0x38): undefined reference to `mdio_driver_unregister'

Using 'depends on MDIO_BUS' instead will avoid the link error.

Fixes: af850e14a7ae ("phy: bcm-ns-usb3: add MDIO driver using proper bus layer")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/phy/broadcom/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
index 37371b89b14f..64fc59c3ae6d 100644
--- a/drivers/phy/broadcom/Kconfig
+++ b/drivers/phy/broadcom/Kconfig
@@ -30,8 +30,8 @@ config PHY_BCM_NS_USB3
 	tristate "Broadcom Northstar USB 3.0 PHY Driver"
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	depends on HAS_IOMEM && OF
+	depends on MDIO_BUS
 	select GENERIC_PHY
-	select MDIO_DEVICE
 	help
 	  Enable this to support Broadcom USB 3.0 PHY connected to the USB
 	  controller on Northstar family.

From 0254e0c632bfe4a0cf610e2d90397474144f00d2 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 26 Jul 2017 15:22:06 -0700
Subject: [PATCH 58/76] net: check dev->addr_len for dev_set_mac_address()

Historically, dev_ifsioc() uses struct sockaddr as mac
address definition, this is why dev_set_mac_address()
accepts a struct sockaddr pointer as input but now we
have various types of mac addresse whose lengths
are up to MAX_ADDR_LEN, longer than struct sockaddr,
and saved in dev->addr_len.

It is too late to fix dev_ifsioc() due to API
compatibility, so just reject those larger than
sizeof(struct sockaddr), otherwise we would read
and use some random bytes from kernel stack.

Fortunately, only a few IPv6 tunnel devices have addr_len
larger than sizeof(struct sockaddr) and they don't support
ndo_set_mac_addr(). But with team driver, in lb mode, they
can still be enslaved to a team master and make its mac addr
length as the same.

Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 06b147d7d9e2..709a4e6fb447 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -263,6 +263,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 		return dev_set_mtu(dev, ifr->ifr_mtu);
 
 	case SIOCSIFHWADDR:
+		if (dev->addr_len > sizeof(struct sockaddr))
+			return -EINVAL;
 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
 
 	case SIOCSIFHWBROADCAST:

From 996f6e12cfb33ff6c3e39b4511bb3b8bc564b342 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 26 Jul 2017 15:22:07 -0700
Subject: [PATCH 59/76] team: use a larger struct for mac address

IPv6 tunnels use sizeof(struct in6_addr) as dev->addr_len,
but in many places especially bonding, we use struct sockaddr
to copy and set mac addr, this could lead to stack out-of-bounds
access.

Fix it by using a larger address storage like bonding.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 464570409796..ae53e899259f 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -60,11 +60,11 @@ static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 static int __set_port_dev_addr(struct net_device *port_dev,
 			       const unsigned char *dev_addr)
 {
-	struct sockaddr addr;
+	struct sockaddr_storage addr;
 
-	memcpy(addr.sa_data, dev_addr, port_dev->addr_len);
-	addr.sa_family = port_dev->type;
-	return dev_set_mac_address(port_dev, &addr);
+	memcpy(addr.__data, dev_addr, port_dev->addr_len);
+	addr.ss_family = port_dev->type;
+	return dev_set_mac_address(port_dev, (struct sockaddr *)&addr);
 }
 
 static int team_port_set_orig_dev_addr(struct team_port *port)

From 8d65843c44269c21e95c98090d9bb4848d473853 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 27 Jul 2017 11:22:05 +0800
Subject: [PATCH 60/76] Revert "vhost: cache used event for better performance"

This reverts commit 809ecb9bca6a9424ccd392d67e368160f8b76c92. Since it
was reported to break vhost_net. We want to cache used event and use
it to check for notification. The assumption was that guest won't move
the event idx back, but this could happen in fact when 16 bit index
wraps around after 64K entries.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 28 ++++++----------------------
 drivers/vhost/vhost.h |  3 ---
 2 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e4613a3c362d..9cb3f722dce1 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -308,7 +308,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->avail = NULL;
 	vq->used = NULL;
 	vq->last_avail_idx = 0;
-	vq->last_used_event = 0;
 	vq->avail_idx = 0;
 	vq->last_used_idx = 0;
 	vq->signalled_used = 0;
@@ -1402,7 +1401,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
 			r = -EINVAL;
 			break;
 		}
-		vq->last_avail_idx = vq->last_used_event = s.num;
+		vq->last_avail_idx = s.num;
 		/* Forget the cached index value. */
 		vq->avail_idx = vq->last_avail_idx;
 		break;
@@ -2241,6 +2240,10 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	__u16 old, new;
 	__virtio16 event;
 	bool v;
+	/* Flush out used index updates. This is paired
+	 * with the barrier that the Guest executes when enabling
+	 * interrupts. */
+	smp_mb();
 
 	if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
 	    unlikely(vq->avail_idx == vq->last_avail_idx))
@@ -2248,10 +2251,6 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 
 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		__virtio16 flags;
-		/* Flush out used index updates. This is paired
-		 * with the barrier that the Guest executes when enabling
-		 * interrupts. */
-		smp_mb();
 		if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
 			return true;
@@ -2266,26 +2265,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	if (unlikely(!v))
 		return true;
 
-	/* We're sure if the following conditions are met, there's no
-	 * need to notify guest:
-	 * 1) cached used event is ahead of new
-	 * 2) old to new updating does not cross cached used event. */
-	if (vring_need_event(vq->last_used_event, new + vq->num, new) &&
-	    !vring_need_event(vq->last_used_event, new, old))
-		return false;
-
-	/* Flush out used index updates. This is paired
-	 * with the barrier that the Guest executes when enabling
-	 * interrupts. */
-	smp_mb();
-
 	if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
 		vq_err(vq, "Failed to get used event idx");
 		return true;
 	}
-	vq->last_used_event = vhost16_to_cpu(vq, event);
-
-	return vring_need_event(vq->last_used_event, new, old);
+	return vring_need_event(vhost16_to_cpu(vq, event), new, old);
 }
 
 /* This actually signals the guest, using eventfd. */
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f72095868b93..bb7c29b8b9fc 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -115,9 +115,6 @@ struct vhost_virtqueue {
 	/* Last index we used. */
 	u16 last_used_idx;
 
-	/* Last used evet we've seen */
-	u16 last_used_event;
-
 	/* Used flags */
 	u16 used_flags;
 

From 500268e9f28de972514de89a5a1b4106d0417989 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Thu, 27 Jul 2017 12:53:04 +0530
Subject: [PATCH 61/76] net: thunderx: Fix BGX transmit stall due to underflow

For SGMII/RGMII/QSGMII interfaces when physical link goes down
while traffic is high is resulting in underflow condition being set
on that specific BGX's LMAC. Which assets a backpresure and VNIC stops
transmitting packets.

This is due to BGX being disabled in link status change callback while
packet is in transit. This patch fixes this issue by not disabling BGX
but instead just disables packet Rx and Tx.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/thunder_bgx.c | 27 +++++++++++++++----
 .../net/ethernet/cavium/thunder/thunder_bgx.h |  2 ++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 79112563a25a..5e5c4d7796b8 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -292,11 +292,30 @@ static void bgx_sgmii_change_link_state(struct lmac *lmac)
 	u64 cmr_cfg;
 	u64 port_cfg = 0;
 	u64 misc_ctl = 0;
+	bool tx_en, rx_en;
 
 	cmr_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG);
-	cmr_cfg &= ~CMR_EN;
+	tx_en = cmr_cfg & CMR_PKT_TX_EN;
+	rx_en = cmr_cfg & CMR_PKT_RX_EN;
+	cmr_cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
 	bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
 
+	/* Wait for BGX RX to be idle */
+	if (bgx_poll_reg(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG,
+			 GMI_PORT_CFG_RX_IDLE, false)) {
+		dev_err(&bgx->pdev->dev, "BGX%d LMAC%d GMI RX not idle\n",
+			bgx->bgx_id, lmac->lmacid);
+		return;
+	}
+
+	/* Wait for BGX TX to be idle */
+	if (bgx_poll_reg(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG,
+			 GMI_PORT_CFG_TX_IDLE, false)) {
+		dev_err(&bgx->pdev->dev, "BGX%d LMAC%d GMI TX not idle\n",
+			bgx->bgx_id, lmac->lmacid);
+		return;
+	}
+
 	port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
 	misc_ctl = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL);
 
@@ -347,10 +366,8 @@ static void bgx_sgmii_change_link_state(struct lmac *lmac)
 	bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL, misc_ctl);
 	bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG, port_cfg);
 
-	port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
-
-	/* Re-enable lmac */
-	cmr_cfg |= CMR_EN;
+	/* Restore CMR config settings */
+	cmr_cfg |= (rx_en ? CMR_PKT_RX_EN : 0) | (tx_en ? CMR_PKT_TX_EN : 0);
 	bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
 
 	if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN)))
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 6b7fe6fdd13b..23acdc5ab896 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -170,6 +170,8 @@
 #define  GMI_PORT_CFG_DUPLEX			BIT_ULL(2)
 #define  GMI_PORT_CFG_SLOT_TIME			BIT_ULL(3)
 #define  GMI_PORT_CFG_SPEED_MSB			BIT_ULL(8)
+#define  GMI_PORT_CFG_RX_IDLE			BIT_ULL(12)
+#define  GMI_PORT_CFG_TX_IDLE			BIT_ULL(13)
 #define BGX_GMP_GMI_RXX_JABBER		0x38038
 #define BGX_GMP_GMI_TXX_THRESH		0x38210
 #define BGX_GMP_GMI_TXX_APPEND		0x38218

From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 Jul 2017 14:45:09 +0200
Subject: [PATCH 62/76] udp6: fix socket leak on early demux

When an early demuxed packet reaches __udp6_lib_lookup_skb(), the
sk reference is retrieved and used, but the relevant reference
count is leaked and the socket destructor is never called.
Beyond leaking the sk memory, if there are pending UDP packets
in the receive queue, even the related accounted memory is leaked.

In the long run, this will cause persistent forward allocation errors
and no UDP skbs (both ipv4 and ipv6) will be able to reach the
user-space.

Fix this by explicitly accessing the early demux reference before
the lookup, and properly decreasing the socket reference count
after usage.

Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and
the now obsoleted comment about "socket cache".

The newly added code is derived from the current ipv4 code for the
similar path.

v1 -> v2:
  fixed the __udp6_lib_rcv() return code for resubmission,
  as suggested by Eric

Reported-by: Sam Edwards <CFSworks@gmail.com>
Reported-by: Marc Haber <mh+netdev@zugschlus.de>
Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  1 +
 net/ipv4/udp.c    |  3 ++-
 net/ipv6/udp.c    | 27 ++++++++++++++++++---------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 56ce2d2a612d..cc8036987dcb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 
 void udp_v4_early_demux(struct sk_buff *skb);
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
 int udp_get_port(struct sock *sk, unsigned short snum,
 		 int (*saddr_cmp)(const struct sock *,
 				  const struct sock *));
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fac7cb9e3b0f..e6276fa3750b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1928,7 +1928,7 @@ drop:
 /* For TCP sockets, sk_rx_dst is protected by socket lock
  * For UDP, we use xchg() to guard against concurrent changes.
  */
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 {
 	struct dst_entry *old;
 
@@ -1937,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 		dst_release(old);
 	}
 }
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
 
 /*
  *	Multicasts and broadcasts go to each listener.
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4a3e65626e8b..98fe4560e24c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  struct udp_table *udptable)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct sock *sk;
 
-	sk = skb_steal_sock(skb);
-	if (unlikely(sk))
-		return sk;
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
 				 udptable, skb);
@@ -804,6 +800,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp6_csum_init(skb, uh, proto))
 		goto csum_error;
 
+	/* Check if the socket is already available, e.g. due to early demux */
+	sk = skb_steal_sock(skb);
+	if (sk) {
+		struct dst_entry *dst = skb_dst(skb);
+		int ret;
+
+		if (unlikely(sk->sk_rx_dst != dst))
+			udp_sk_rx_dst_set(sk, dst);
+
+		ret = udpv6_queue_rcv_skb(sk, skb);
+		sock_put(sk);
+
+		/* a return value > 0 means to resubmit the input */
+		if (ret > 0)
+			return ret;
+		return 0;
+	}
+
 	/*
 	 *	Multicast receive code
 	 */
@@ -812,11 +826,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 				saddr, daddr, udptable, proto);
 
 	/* Unicast */
-
-	/*
-	 * check socket cache ... must talk to Alan about his plans
-	 * for sock caches... i'll skip this for now.
-	 */
 	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	if (sk) {
 		int ret;

From 89b096898a8450b0a5b97d521e000ae9f94f81f9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 27 Jul 2017 21:02:46 +0200
Subject: [PATCH 63/76] bpf: don't indicate success when copy_from_user fails

err in bpf_prog_get_info_by_fd() still holds 0 at that time from prior
check_uarg_tail_zero() check. Explicitly return -EFAULT instead, so
user space can be notified of buggy behavior.

Fixes: 1e2709769086 ("bpf: Add BPF_OBJ_GET_INFO_BY_FD")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 045646da97cc..84bb39975ad4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1289,7 +1289,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	info_len = min_t(u32, sizeof(info), info_len);
 
 	if (copy_from_user(&info, uinfo, info_len))
-		return err;
+		return -EFAULT;
 
 	info.type = prog->type;
 	info.id = prog->aux->id;

From b103ec73b27ad385241c806a21f8e2bdeae2f13a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 27 Jul 2017 23:15:09 +0100
Subject: [PATCH 64/76] net: tc35815: fix spelling mistake: "Intterrupt" ->
 "Interrupt"

Trivial fix to spelling mistake in printk message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/tc35815.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index d9db8a06afd2..cce9c9ed46aa 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1338,7 +1338,7 @@ static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev)
 static void tc35815_fatal_error_interrupt(struct net_device *dev, u32 status)
 {
 	static int count;
-	printk(KERN_WARNING "%s: Fatal Error Intterrupt (%#x):",
+	printk(KERN_WARNING "%s: Fatal Error Interrupt (%#x):",
 	       dev->name, status);
 	if (status & Int_IntPCI)
 		printk(" IntPCI");

From efe967cdec32af93e15839cc639695ec5f637771 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 28 Jul 2017 16:41:37 +0200
Subject: [PATCH 65/76] tcp: avoid bogus gcc-7 array-bounds warning

When using CONFIG_UBSAN_SANITIZE_ALL, the TCP code produces a
false-positive warning:

net/ipv4/tcp_output.c: In function 'tcp_connect':
net/ipv4/tcp_output.c:2207:40: error: array subscript is below array bounds [-Werror=array-bounds]
   tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
                                        ^~
net/ipv4/tcp_output.c:2207:40: error: array subscript is below array bounds [-Werror=array-bounds]
   tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~

I have opened a gcc bug for this, but distros have already shipped
compilers with this problem, and it's not clear yet whether there is
a way for gcc to avoid the warning. As the problem is related to the
bitfield access, this introduces a temporary variable to store the old
enum value.

I did not notice this warning earlier, since UBSAN is disabled when
building with COMPILE_TEST, and that was always turned on in both
allmodconfig and randconfig tests.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81601
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..2f1588bf73da 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
 {
 	const u32 now = tcp_jiffies32;
+	enum tcp_chrono old = tp->chrono_type;
 
-	if (tp->chrono_type > TCP_CHRONO_UNSPEC)
-		tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+	if (old > TCP_CHRONO_UNSPEC)
+		tp->chrono_stat[old - 1] += now - tp->chrono_start;
 	tp->chrono_start = now;
 	tp->chrono_type = new;
 }

From 9975a54b3c9ecf029cbf5dd7a8c9701b1d74029e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 28 Jul 2017 17:05:25 +0200
Subject: [PATCH 66/76] bpf: fix bpf_prog_get_info_by_fd to dump correct
 xlated_prog_len

bpf_prog_size(prog->len) is not the correct length we want to dump
back to user space. The code in bpf_prog_get_info_by_fd() uses this
to copy prog->insnsi to user space, but bpf_prog_size(prog->len) also
includes the size of struct bpf_prog itself plus program instructions
and is usually used either in context of accounting or for bpf_prog_alloc()
et al, thus we copy out of bounds in bpf_prog_get_info_by_fd()
potentially. Use the correct bpf_prog_insn_size() instead.

Fixes: 1e2709769086 ("bpf: Add BPF_OBJ_GET_INFO_BY_FD")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 84bb39975ad4..6c772adabad2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1312,7 +1312,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.xlated_prog_len;
-	info.xlated_prog_len = bpf_prog_size(prog->len);
+	info.xlated_prog_len = bpf_prog_insn_size(prog);
 	if (info.xlated_prog_len && ulen) {
 		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
 		ulen = min_t(u32, info.xlated_prog_len, ulen);

From 96a734bae00f4ea46d0f3f3a8e347c31d24489bd Mon Sep 17 00:00:00 2001
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Date: Thu, 27 Jul 2017 17:26:00 +0100
Subject: [PATCH 67/76] sunhme: fix up GREG_STAT and GREG_IMASK register
 offsets

Update the values to match those from the STP2002QFP documentation.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sunhme.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sun/sunhme.h b/drivers/net/ethernet/sun/sunhme.h
index 3af540adb3c5..fca1bca7f69d 100644
--- a/drivers/net/ethernet/sun/sunhme.h
+++ b/drivers/net/ethernet/sun/sunhme.h
@@ -13,9 +13,9 @@
 /* Happy Meal global registers. */
 #define GREG_SWRESET	0x000UL	/* Software Reset  */
 #define GREG_CFG	0x004UL	/* Config Register */
-#define GREG_STAT	0x108UL	/* Status          */
-#define GREG_IMASK	0x10cUL	/* Interrupt Mask  */
-#define GREG_REG_SIZE	0x110UL
+#define GREG_STAT	0x100UL	/* Status          */
+#define GREG_IMASK	0x104UL	/* Interrupt Mask  */
+#define GREG_REG_SIZE	0x108UL
 
 /* Global reset register. */
 #define GREG_RESET_ETX         0x01

From 7ad813f208533cebfcc32d3d7474dc1677d1b09a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Jul 2017 11:58:36 -0700
Subject: [PATCH 68/76] net: phy: Correctly process PHY_HALTED in
 phy_stop_machine()

Marc reported that he was not getting the PHY library adjust_link()
callback function to run when calling phy_stop() + phy_disconnect()
which does not indeed happen because we set the state machine to
PHY_HALTED but we don't get to run it to process this state past that
point.

Fix this with a synchronous call to phy_state_machine() in order to have
the state machine actually act on PHY_HALTED, set the PHY device's link
down, turn the network device's carrier off and finally call the
adjust_link() function.

Reported-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d0626bf5c540..5068c582d502 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -749,6 +749,9 @@ void phy_stop_machine(struct phy_device *phydev)
 	if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
 		phydev->state = PHY_UP;
 	mutex_unlock(&phydev->lock);
+
+	/* Now we can run the state machine synchronously */
+	phy_state_machine(&phydev->state_queue.work);
 }
 
 /**

From 71ed7ee35ad2c5300f4b51634185a0193b4fb0fa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 28 Jul 2017 23:27:44 +0300
Subject: [PATCH 69/76] ipv4: fib: Fix NULL pointer deref during
 fib_sync_down_dev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Michał reported a NULL pointer deref during fib_sync_down_dev() when
unregistering a netdevice. The problem is that we don't check for
'in_dev' being NULL, which can happen in very specific cases.

Usually routes are flushed upon NETDEV_DOWN sent in either the netdev or
the inetaddr notification chains. However, if an interface isn't
configured with any IP address, then it's possible for host routes to be
flushed following NETDEV_UNREGISTER, after NULLing dev->ip_ptr in
inetdev_destroy().

To reproduce:
$ ip link add type dummy
$ ip route add local 1.1.1.0/24 dev dummy0
$ ip link del dev dummy0

Fix this by checking for the presence of 'in_dev' before referencing it.

Fixes: 982acb97560c ("ipv4: fib: Notify about nexthop status changes")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Tested-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 222100103808..b8d18171cca3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1452,7 +1452,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 		return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
 					  &info.info);
 	case FIB_EVENT_NH_DEL:
-		if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
 		    (fib_nh->nh_flags & RTNH_F_DEAD))
 			return call_fib_notifiers(dev_net(fib_nh->nh_dev),

From 13332db54fb5a821c86432238d33be1ff0429394 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 31 Jul 2017 09:47:50 -0700
Subject: [PATCH 70/76] MAINTAINERS: Add more files to the PHY LIBRARY section

Include missing files that are provided by, used, or directly maintained
within the PHY LIBRARY, this include uapi header, header files used by
Device Tree code etc.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 297e610c9163..a7a392a74859 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5138,12 +5138,20 @@ M:	Andrew Lunn <andrew@lunn.ch>
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	include/linux/phy.h
-F:	include/linux/phy_fixed.h
-F:	drivers/net/phy/
+F:	Documentation/ABI/testing/sysfs-bus-mdio
+F:	Documentation/devicetree/bindings/net/mdio*
 F:	Documentation/networking/phy.txt
+F:	drivers/net/phy/
 F:	drivers/of/of_mdio.c
 F:	drivers/of/of_net.c
+F:	include/linux/*mdio*.h
+F:	include/linux/of_net.h
+F:	include/linux/phy.h
+F:	include/linux/phy_fixed.h
+F:	include/linux/platform_data/mdio-gpio.h
+F:	include/trace/events/mdio.h
+F:	include/uapi/linux/mdio.h
+F:	include/uapi/linux/mii.h
 
 EXT2 FILE SYSTEM
 M:	Jan Kara <jack@suse.com>

From cfbcb61f6c2ac9aeb7731d2ec59fe3bf53786c49 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 29 Jul 2017 22:18:41 +0300
Subject: [PATCH 71/76] mv643xx_eth: fix of_irq_to_resource() error check

of_irq_to_resource() has recently been  fixed to return negative error #'s
along with 0 in case of failure,  however the Marvell MV643xx Ethernet
driver still only regards 0  as invalid IRQ -- fix it up.

Fixes: 7a4228bbff76 ("of: irq: use of_irq_get() in of_irq_to_resource()")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 5794d98d946f..9c94ea9b2b80 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2734,7 +2734,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
 	ppd.shared = pdev;
 
 	memset(&res, 0, sizeof(res));
-	if (!of_irq_to_resource(pnp, 0, &res)) {
+	if (of_irq_to_resource(pnp, 0, &res) <= 0) {
 		dev_err(&pdev->dev, "missing interrupt on %s\n", pnp->name);
 		return -EINVAL;
 	}

From 1daa8790d0280d2c719658e39bd59fce65efa909 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 31 Jul 2017 21:49:49 +0300
Subject: [PATCH 72/76] virtio_net: fix truesize for mergeable buffers

Seth Forshee noticed a performance degradation with some workloads.
This turns out to be due to packet drops.  Euan Kemp noticed that this
is because we drop all packets where length exceeds the truesize, but
for some packets we add in extra memory without updating the truesize.
This in turn was kept around unchanged from ab7db91705e95 ("virtio-net:
auto-tune mergeable rx buffer size for improved performance").  That
commit had an internal reason not to account for the extra space: not
enough bits to do it.  No longer true so let's account for the allocated
length exactly.

Many thanks to Seth Forshee for the report and bisecting and Euan Kemp
for debugging the issue.

Fixes: 680557cf79f8 ("virtio_net: rework mergeable buffer handling")
Reported-by: Euan Kemp <euan.kemp@coreos.com>
Tested-by: Euan Kemp <euan.kemp@coreos.com>
Reported-by: Seth Forshee <seth.forshee@canonical.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 99a26a9efec1..075dc18829ab 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -889,21 +889,20 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
-	ctx = (void *)(unsigned long)len;
 	get_page(alloc_frag->page);
 	alloc_frag->offset += len + headroom;
 	hole = alloc_frag->size - alloc_frag->offset;
 	if (hole < len + headroom) {
 		/* To avoid internal fragmentation, if there is very likely not
 		 * enough space for another buffer, add the remaining space to
-		 * the current buffer. This extra space is not included in
-		 * the truesize stored in ctx.
+		 * the current buffer.
 		 */
 		len += hole;
 		alloc_frag->offset += hole;
 	}
 
 	sg_init_one(rq->sg, buf, len);
+	ctx = (void *)(unsigned long)len;
 	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));

From 00d51094b8a5856ae790048a332ae24f1dd7c994 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 31 Jul 2017 11:05:32 -0700
Subject: [PATCH 73/76] Revert "net: bcmgenet: Remove init parameter from
 bcmgenet_mii_config"

This reverts commit 28b45910ccda ("net: bcmgenet: Remove init parameter
from bcmgenet_mii_config") because in the process of moving from
dev_info() to dev_info_once() we essentially lost the helpful printed
messages once the second instance of the driver is loaded.
dev_info_once() does not actually print the message once per device
instance, but once period.

Fixes: 28b45910ccda ("net: bcmgenet: Remove init parameter from bcmgenet_mii_config")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +-
 drivers/net/ethernet/broadcom/genet/bcmmii.c   | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 7b0b399aaedd..a981c4ee9d72 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3669,7 +3669,7 @@ static int bcmgenet_resume(struct device *d)
 
 	phy_init_hw(priv->phydev);
 	/* Speed settings must be restored */
-	bcmgenet_mii_config(priv->dev);
+	bcmgenet_mii_config(priv->dev, false);
 
 	/* disable ethernet MAC while updating its registers */
 	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index b9344de669f8..3a34fdba5301 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -698,7 +698,7 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF);
 
 /* MDIO routines */
 int bcmgenet_mii_init(struct net_device *dev);
-int bcmgenet_mii_config(struct net_device *dev);
+int bcmgenet_mii_config(struct net_device *dev, bool init);
 int bcmgenet_mii_probe(struct net_device *dev);
 void bcmgenet_mii_exit(struct net_device *dev);
 void bcmgenet_mii_reset(struct net_device *dev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 071fcbd14e6a..30cb97b4a1d7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -238,7 +238,7 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 					  bcmgenet_fixed_phy_link_update);
 }
 
-int bcmgenet_mii_config(struct net_device *dev)
+int bcmgenet_mii_config(struct net_device *dev, bool init)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct phy_device *phydev = priv->phydev;
@@ -327,7 +327,8 @@ int bcmgenet_mii_config(struct net_device *dev)
 		bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
 	}
 
-	dev_info_once(kdev, "configuring instance for %s\n", phy_name);
+	if (init)
+		dev_info(kdev, "configuring instance for %s\n", phy_name);
 
 	return 0;
 }
@@ -375,7 +376,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
 	 * things appropriately.
 	 */
-	ret = bcmgenet_mii_config(dev);
+	ret = bcmgenet_mii_config(dev, true);
 	if (ret) {
 		phy_disconnect(priv->phydev);
 		return ret;

From ddab82821fa633da768879fc515e76dcb1207151 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 31 Jul 2017 18:07:38 +0800
Subject: [PATCH 74/76] ppp: Fix a scheduling-while-atomic bug in del_chan

The PPTP set the pptp_sock_destruct as the sock's sk_destruct, it would
trigger this bug when __sk_free is invoked in atomic context, because of
the call path pptp_sock_destruct->del_chan->synchronize_rcu.

Now move the synchronize_rcu to pptp_release from del_chan. This is the
only one case which would free the sock and need the synchronize_rcu.

The following is the panic I met with kernel 3.3.8, but this issue should
exist in current kernel too according to the codes.

BUG: scheduling while atomic
__schedule_bug+0x5e/0x64
__schedule+0x55/0x580
? ppp_unregister_channel+0x1cd5/0x1de0 [ppp_generic]
? dev_hard_start_xmit+0x423/0x530
? sch_direct_xmit+0x73/0x170
__cond_resched+0x16/0x30
_cond_resched+0x22/0x30
wait_for_common+0x18/0x110
? call_rcu_bh+0x10/0x10
wait_for_completion+0x12/0x20
wait_rcu_gp+0x34/0x40
? wait_rcu_gp+0x40/0x40
synchronize_sched+0x1e/0x20
0xf8417298
0xf8417484
? sock_queue_rcv_skb+0x109/0x130
__sk_free+0x16/0x110
? udp_queue_rcv_skb+0x1f2/0x290
sk_free+0x16/0x20
__udp4_lib_rcv+0x3b8/0x650

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index eac499c58aa7..6dde9a0cfe76 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -131,7 +131,6 @@ static void del_chan(struct pppox_sock *sock)
 	clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
 	RCU_INIT_POINTER(callid_sock[sock->proto.pptp.src_addr.call_id], NULL);
 	spin_unlock(&chan_lock);
-	synchronize_rcu();
 }
 
 static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
@@ -520,6 +519,7 @@ static int pptp_release(struct socket *sock)
 
 	po = pppox_sk(sk);
 	del_chan(po);
+	synchronize_rcu();
 
 	pppox_unbind_sock(sk);
 	sk->sk_state = PPPOX_DEAD;

From cb891fa6a1d5f52c5f5c07b6f7f4c6d65ea55fc0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 31 Jul 2017 16:52:36 +0200
Subject: [PATCH 75/76] udp6: fix jumbogram reception

Since commit 67a51780aebb ("ipv6: udp: leverage scratch area
helpers") udp6_recvmsg() read the skb len from the scratch area,
to avoid a cache miss.
But the UDP6 rx path support RFC 2675 UDPv6 jumbograms, and their
length exceeds the 16 bits available in the scratch area. As a side
effect the length returned by recvmsg() is:
<ingress datagram len> % (1<<16)

This commit addresses the issue allocating one more bit in the
IP6CB flags field and setting it for incoming jumbograms.
Such field is still in the first cacheline, so at recvmsg()
time we can check it and fallback to access skb->len if
required, without a measurable overhead.

Fixes: 67a51780aebb ("ipv6: udp: leverage scratch area helpers")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h |  6 ++++++
 net/ipv6/exthdrs.c   |  1 +
 net/ipv6/udp.c       | 11 ++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e1b442996f81..474d6bbc158c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -128,6 +128,7 @@ struct inet6_skb_parm {
 #define IP6SKB_FRAGMENTED      16
 #define IP6SKB_HOPBYHOP        32
 #define IP6SKB_L3SLAVE         64
+#define IP6SKB_JUMBOGRAM      128
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
@@ -152,6 +153,11 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
 }
 
+static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
+{
+	return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM);
+}
+
 /* can not be used in TCP layer after tcp_v6_fill_cb */
 static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4996d734f1d2..3cec529c6113 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
 		goto drop;
 
+	IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM;
 	return true;
 
 drop:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 98fe4560e24c..578142b7ca3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 EXPORT_SYMBOL_GPL(udp6_lib_lookup);
 #endif
 
+/* do not use the scratch area len for jumbogram: their length execeeds the
+ * scratch area space; note that the IP6CB flags is still in the first
+ * cacheline, so checking for jumbograms is cheap
+ */
+static int udp6_skb_len(struct sk_buff *skb)
+{
+	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
+}
+
 /*
  *	This should be easy, if there is something there we
  *	return it, otherwise we block.
@@ -358,7 +367,7 @@ try_again:
 	if (!skb)
 		return err;
 
-	ulen = udp_skb_len(skb);
+	ulen = udp6_skb_len(skb);
 	copied = len;
 	if (copied > ulen - off)
 		copied = ulen - off;

From cc75f8514db6a3aec517760fccaf954e5b46478c Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Mon, 31 Jul 2017 14:40:50 -0700
Subject: [PATCH 76/76] samples/bpf: fix bpf tunnel cleanup

test_tunnel_bpf.sh fails to remove the vxlan11 tunnel device, causing the
next geneve tunnelling test case fails.  In addition, the geneve reserved bit
in tcbpf2_kern.c should be zero, according to the RFC.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/tcbpf2_kern.c      | 4 ++--
 samples/bpf/test_tunnel_bpf.sh | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 9c823a609e75..270edcc149a1 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -147,9 +147,9 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
 	__builtin_memset(&gopt, 0x0, sizeof(gopt));
 	gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
 	gopt.type = 0x08;
-	gopt.r1 = 1;
+	gopt.r1 = 0;
 	gopt.r2 = 0;
-	gopt.r3 = 1;
+	gopt.r3 = 0;
 	gopt.length = 2; /* 4-byte multiple */
 	*(int *) &gopt.opt_data = 0xdeadbeef;
 
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 1ff634f187b7..a70d2ea90313 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -149,6 +149,7 @@ function cleanup {
 	ip link del veth1
 	ip link del ipip11
 	ip link del gretap11
+	ip link del vxlan11
 	ip link del geneve11
 	pkill tcpdump
 	pkill cat