From bb57768215fb6b557a1ef95a3d87b011efe2e267 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 2 Jun 2020 22:26:38 +0200 Subject: [PATCH 001/131] mt76: add missing lock configuring coverage class Coverage class callback can potentially run in parallel with other routines (e.g. mt7615_set_channel) that configures timing registers. Run coverage class callback holding mt76 mutex Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 ++ drivers/net/wireless/mediatek/mt76/mt7615/main.c | 3 +++ drivers/net/wireless/mediatek/mt76/mt7915/main.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 26cb711b465f38..83dfa6da4761ed 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -642,8 +642,10 @@ mt7603_set_coverage_class(struct ieee80211_hw *hw, s16 coverage_class) { struct mt7603_dev *dev = hw->priv; + mutex_lock(&dev->mt76.mutex); dev->coverage_class = max_t(s16, coverage_class, 0); mt7603_mac_set_timing(dev); + mutex_unlock(&dev->mt76.mutex); } static void mt7603_tx(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index c26f99b368d934..52d6544698fee4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -735,9 +735,12 @@ static void mt7615_set_coverage_class(struct ieee80211_hw *hw, s16 coverage_class) { struct mt7615_phy *phy = mt7615_hw_phy(hw); + struct mt7615_dev *dev = phy->dev; + mutex_lock(&dev->mt76.mutex); phy->coverage_class = max_t(s16, coverage_class, 0); mt7615_mac_set_timing(phy); + mutex_unlock(&dev->mt76.mutex); } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 0575c259f2459f..05b5650c56c82d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -716,9 +716,12 @@ static void mt7915_set_coverage_class(struct ieee80211_hw *hw, s16 coverage_class) { struct mt7915_phy *phy = mt7915_hw_phy(hw); + struct mt7915_dev *dev = phy->dev; + mutex_lock(&dev->mt76.mutex); phy->coverage_class = max_t(s16, coverage_class, 0); mt7915_mac_set_timing(phy); + mutex_unlock(&dev->mt76.mutex); } static int From d941f47caa386931c3b598ad1b43d5ddd65869aa Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 30 May 2020 23:51:27 +0200 Subject: [PATCH 002/131] mt76: mt7615: fix lmac queue debugsfs entry acs and wmm index are swapped in mt7615_queues_acq respect to the hw design Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c index fd3ef483a87ce7..d06afcf46d67cc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c @@ -234,10 +234,11 @@ mt7615_queues_acq(struct seq_file *s, void *data) int i; for (i = 0; i < 16; i++) { - int j, acs = i / 4, index = i % 4; + int j, wmm_idx = i % MT7615_MAX_WMM_SETS; + int acs = i / MT7615_MAX_WMM_SETS; u32 ctrl, val, qlen = 0; - val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, index)); + val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, wmm_idx)); ctrl = BIT(31) | BIT(15) | (acs << 8); for (j = 0; j < 32; j++) { @@ -245,11 +246,11 @@ mt7615_queues_acq(struct seq_file *s, void *data) continue; mt76_wr(dev, MT_PLE_FL_Q0_CTRL, - ctrl | (j + (index << 5))); + ctrl | (j + (wmm_idx << 5))); qlen += mt76_get_field(dev, MT_PLE_FL_Q3_CTRL, GENMASK(11, 0)); } - seq_printf(s, "AC%d%d: queued=%d\n", acs, index, qlen); + seq_printf(s, "AC%d%d: queued=%d\n", wmm_idx, acs, qlen); } return 0; From a07292ee144b1c4f4269871bb138d16cf5dc603c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 31 May 2020 00:32:04 +0200 Subject: [PATCH 003/131] mt76: mt7615: fix hw queue mapping mt7622/mt7663 chipsets rely on a fixed reverse queue map order respect to mac80211 one: - q(0): IEEE80211_AC_BK - q(1): IEEE80211_AC_BE - q(2): IEEE80211_AC_VI - q(3): IEEE80211_AC_VO Fixes: cdad4874057d ("mt76: mt7615: add dma and tx queue initialization for MT7622") Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support") Co-developed-by: Sean Wang Signed-off-by: Sean Wang Co-developed-by: Ryder Lee Signed-off-by: Ryder Lee Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7615/dma.c | 8 ++--- .../net/wireless/mediatek/mt76/mt7615/mac.c | 2 +- .../net/wireless/mediatek/mt76/mt7615/mac.h | 15 ---------- .../net/wireless/mediatek/mt76/mt7615/main.c | 1 + .../wireless/mediatek/mt76/mt7615/mt7615.h | 30 +++++++++++++++++++ drivers/net/wireless/mediatek/mt76/usb.c | 17 ++++++----- 6 files changed, 45 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c index 5a124610d4af02..83fdcf5db3c792 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c @@ -36,10 +36,10 @@ static int mt7622_init_tx_queues_multi(struct mt7615_dev *dev) { static const u8 wmm_queue_map[] = { - MT7622_TXQ_AC0, - MT7622_TXQ_AC1, - MT7622_TXQ_AC2, - MT7622_TXQ_AC3, + [IEEE80211_AC_BK] = MT7622_TXQ_AC0, + [IEEE80211_AC_BE] = MT7622_TXQ_AC1, + [IEEE80211_AC_VI] = MT7622_TXQ_AC2, + [IEEE80211_AC_VO] = MT7622_TXQ_AC3, }; int ret; int i; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 9f1c6ca7a665bc..5990355bc731e2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -528,7 +528,7 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, if (ieee80211_is_data(fc) || ieee80211_is_bufferable_mmpdu(fc)) { q_idx = wmm_idx * MT7615_MAX_WMM_SETS + - skb_get_queue_mapping(skb); + mt7615_lmac_mapping(dev, skb_get_queue_mapping(skb)); p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT; } else if (beacon) { if (ext_phy) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h index f0d4b29a52a240..81608ab656b8ef 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h @@ -124,21 +124,6 @@ enum tx_pkt_type { MT_TX_TYPE_FW, }; -enum tx_pkt_queue_idx { - MT_LMAC_AC00, - MT_LMAC_AC01, - MT_LMAC_AC02, - MT_LMAC_AC03, - MT_LMAC_ALTX0 = 0x10, - MT_LMAC_BMC0, - MT_LMAC_BCN0, - MT_LMAC_PSMP0, - MT_LMAC_ALTX1, - MT_LMAC_BMC1, - MT_LMAC_BCN1, - MT_LMAC_PSMP1, -}; - enum tx_port_idx { MT_TX_PORT_IDX_LMAC, MT_TX_PORT_IDX_MCU diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 52d6544698fee4..beaca812768082 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -397,6 +397,7 @@ mt7615_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue, struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv; struct mt7615_dev *dev = mt7615_hw_dev(hw); + queue = mt7615_lmac_mapping(dev, queue); queue += mvif->wmm_idx * MT7615_MAX_WMM_SETS; return mt7615_mcu_set_wmm(dev, queue, params); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index d6176d316bee26..3e7d51bf42a4d3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -282,6 +282,21 @@ struct mt7615_dev { struct list_head wd_head; }; +enum tx_pkt_queue_idx { + MT_LMAC_AC00, + MT_LMAC_AC01, + MT_LMAC_AC02, + MT_LMAC_AC03, + MT_LMAC_ALTX0 = 0x10, + MT_LMAC_BMC0, + MT_LMAC_BCN0, + MT_LMAC_PSMP0, + MT_LMAC_ALTX1, + MT_LMAC_BMC1, + MT_LMAC_BCN1, + MT_LMAC_PSMP1, +}; + enum { HW_BSSID_0 = 0x0, HW_BSSID_1, @@ -447,6 +462,21 @@ static inline u16 mt7615_wtbl_size(struct mt7615_dev *dev) return MT7615_WTBL_SIZE; } +static inline u8 mt7615_lmac_mapping(struct mt7615_dev *dev, u8 ac) +{ + static const u8 lmac_queue_map[] = { + [IEEE80211_AC_BK] = MT_LMAC_AC00, + [IEEE80211_AC_BE] = MT_LMAC_AC01, + [IEEE80211_AC_VI] = MT_LMAC_AC02, + [IEEE80211_AC_VO] = MT_LMAC_AC03, + }; + + if (WARN_ON_ONCE(ac >= ARRAY_SIZE(lmac_queue_map))) + return MT_LMAC_AC01; /* BE */ + + return lmac_queue_map[ac]; +} + void mt7615_dma_reset(struct mt7615_dev *dev); void mt7615_scan_work(struct work_struct *work); void mt7615_roc_work(struct work_struct *work); diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index fb97ea25b4d436..27abcdb61a064f 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -1010,17 +1010,18 @@ static void mt76u_tx_kick(struct mt76_dev *dev, struct mt76_queue *q) static u8 mt76u_ac_to_hwq(struct mt76_dev *dev, u8 ac) { if (mt76_chip(dev) == 0x7663) { - static const u8 wmm_queue_map[] = { - [IEEE80211_AC_VO] = 0, - [IEEE80211_AC_VI] = 1, - [IEEE80211_AC_BE] = 2, - [IEEE80211_AC_BK] = 4, + static const u8 lmac_queue_map[] = { + /* ac to lmac mapping */ + [IEEE80211_AC_BK] = 0, + [IEEE80211_AC_BE] = 1, + [IEEE80211_AC_VI] = 2, + [IEEE80211_AC_VO] = 4, }; - if (WARN_ON(ac >= ARRAY_SIZE(wmm_queue_map))) - return 2; /* BE */ + if (WARN_ON(ac >= ARRAY_SIZE(lmac_queue_map))) + return 1; /* BE */ - return wmm_queue_map[ac]; + return lmac_queue_map[ac]; } return mt76_ac_to_hwq(ac); From 7f88314321e20744114bc596b6528bb9d57ff46f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 1 Jun 2020 09:45:33 +0200 Subject: [PATCH 004/131] mt76: overwrite qid for non-bufferable mgmt frames Overwrite hw queue id for non-bufferable management frames if the hw support always txq (altxq) in order to be in sync with mac txwi code Fixes: cdad4874057d ("mt76: mt7615: add dma and tx queue initialization for MT7622") Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support") Suggested-by: Felix Fietkau Tested-by: Sean Wang Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + .../net/wireless/mediatek/mt76/mt7615/dma.c | 1 + .../net/wireless/mediatek/mt76/mt7615/mac.c | 20 +++++++------------ .../net/wireless/mediatek/mt76/mt7615/mmio.c | 2 +- .../net/wireless/mediatek/mt76/mt7615/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/tx.c | 7 +++++++ 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index dfe625a53c6319..3d7db6ffb59987 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -301,6 +301,7 @@ struct mt76_hw_cap { #define MT_DRV_TX_ALIGNED4_SKBS BIT(1) #define MT_DRV_SW_RX_AIRTIME BIT(2) #define MT_DRV_RX_DMA_HDR BIT(3) +#define MT_DRV_HW_MGMT_TXQ BIT(4) struct mt76_driver_ops { u32 drv_flags; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c index 83fdcf5db3c792..e5a965df899a17 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c @@ -100,6 +100,7 @@ mt7615_tx_cleanup(struct mt7615_dev *dev) int i; mt76_queue_tx_cleanup(dev, MT_TXQ_MCU, false); + mt76_queue_tx_cleanup(dev, MT_TXQ_PSD, false); if (is_mt7615(&dev->mt76)) { mt76_queue_tx_cleanup(dev, MT_TXQ_BE, false); } else { diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 5990355bc731e2..d97315ec72658e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -526,22 +526,16 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, fc_type = (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) >> 2; fc_stype = (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) >> 4; - if (ieee80211_is_data(fc) || ieee80211_is_bufferable_mmpdu(fc)) { - q_idx = wmm_idx * MT7615_MAX_WMM_SETS + - mt7615_lmac_mapping(dev, skb_get_queue_mapping(skb)); - p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT; - } else if (beacon) { - if (ext_phy) - q_idx = MT_LMAC_BCN1; - else - q_idx = MT_LMAC_BCN0; + if (beacon) { p_fmt = MT_TX_TYPE_FW; + q_idx = ext_phy ? MT_LMAC_BCN1 : MT_LMAC_BCN0; + } else if (skb_get_queue_mapping(skb) >= MT_TXQ_PSD) { + p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT; + q_idx = ext_phy ? MT_LMAC_ALTX1 : MT_LMAC_ALTX0; } else { - if (ext_phy) - q_idx = MT_LMAC_ALTX1; - else - q_idx = MT_LMAC_ALTX0; p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT; + q_idx = wmm_idx * MT7615_MAX_WMM_SETS + + mt7615_lmac_mapping(dev, skb_get_queue_mapping(skb)); } val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + sz_txd) | diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c index e670393506f0b4..2e99845b9c96c6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c @@ -146,7 +146,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base, static const struct mt76_driver_ops drv_ops = { /* txwi_size = txd size + txp size */ .txwi_size = MT_TXD_SIZE + sizeof(struct mt7615_txp_common), - .drv_flags = MT_DRV_TXWI_NO_FREE, + .drv_flags = MT_DRV_TXWI_NO_FREE | MT_DRV_HW_MGMT_TXQ, .survey_flags = SURVEY_INFO_TIME_TX | SURVEY_INFO_TIME_RX | SURVEY_INFO_TIME_BSS_RX, diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index a50077eb24d75f..f5dc1b8285186a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -270,7 +270,7 @@ static int mt7663u_probe(struct usb_interface *usb_intf, { static const struct mt76_driver_ops drv_ops = { .txwi_size = MT_USB_TXD_SIZE, - .drv_flags = MT_DRV_RX_DMA_HDR, + .drv_flags = MT_DRV_RX_DMA_HDR | MT_DRV_HW_MGMT_TXQ, .tx_prepare_skb = mt7663u_tx_prepare_skb, .tx_complete_skb = mt7663u_tx_complete_skb, .tx_status_data = mt7663u_tx_status_data, diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index fca38ea2441f79..f10c98aa883c0e 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -264,6 +264,13 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta, skb_set_queue_mapping(skb, qid); } + if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) && + !ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) { + qid = MT_TXQ_PSD; + skb_set_queue_mapping(skb, qid); + } + if (!(wcid->tx_info & MT_WCID_TX_INFO_SET)) ieee80211_get_tx_rates(info->control.vif, sta, skb, info->control.rates, 1); From fbb1461ad1d6eacca9beb69a2f3ce1b5398d399b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Apr 2020 11:29:55 +0300 Subject: [PATCH 005/131] iwlwifi: mvm: don't call iwl_mvm_free_inactive_queue() under RCU iwl_mvm_free_inactive_queue() will sleep in synchronize_net() under some circumstances, so don't call it under RCU. There doesn't appear to be a need for RCU protection around this particular call. Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200403112332.0f49448c133d.I17fd308bc4a9491859c9b112f4eb5d2c3fc18d7d@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index fee01cbbd3ace8..27977992fd7fa2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1189,17 +1189,15 @@ static int iwl_mvm_inactivity_check(struct iwl_mvm *mvm, u8 alloc_for_sta) for_each_set_bit(i, &changetid_queues, IWL_MAX_HW_QUEUES) iwl_mvm_change_queue_tid(mvm, i); + rcu_read_unlock(); + if (free_queue >= 0 && alloc_for_sta != IWL_MVM_INVALID_STA) { ret = iwl_mvm_free_inactive_queue(mvm, free_queue, queue_owner, alloc_for_sta); - if (ret) { - rcu_read_unlock(); + if (ret) return ret; - } } - rcu_read_unlock(); - return free_queue; } From ea0cca61d628662e4a1b26c77c7646f9a0257069 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 12 Jun 2020 09:38:00 +0200 Subject: [PATCH 006/131] iwlwifi: fix crash in iwl_dbg_tlv_alloc_trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tlv passed to iwl_dbg_tlv_alloc_trigger comes from a loaded firmware file. The memory can be marked as read-only as firmware could be shared. In anyway, writing to this memory is not expected. So, iwl_dbg_tlv_alloc_trigger can crash now: BUG: unable to handle page fault for address: ffffae2c01bfa794 PF: supervisor write access in kernel mode PF: error_code(0x0003) - permissions violation PGD 107d51067 P4D 107d51067 PUD 107d52067 PMD 659ad2067 PTE 8000000662298161 CPU: 2 PID: 161 Comm: kworker/2:1 Not tainted 5.7.0-3.gad96a07-default #1 openSUSE Tumbleweed (unreleased) RIP: 0010:iwl_dbg_tlv_alloc_trigger+0x25/0x60 [iwlwifi] Code: eb f2 0f 1f 00 66 66 66 66 90 83 7e 04 33 48 89 f8 44 8b 46 10 48 89 f7 76 40 41 8d 50 ff 83 fa 19 77 23 8b 56 20 85 d2 75 07 46 20 ff ff ff ff 4b 8d 14 40 48 c1 e2 04 48 8d b4 10 00 05 00 RSP: 0018:ffffae2c00417ce8 EFLAGS: 00010246 RAX: ffff8f0522334018 RBX: ffff8f0522334018 RCX: ffffffffc0fc26c0 RDX: 0000000000000000 RSI: ffffae2c01bfa774 RDI: ffffae2c01bfa774 RBP: 0000000000000000 R08: 0000000000000004 R09: 0000000000000001 R10: 0000000000000034 R11: ffffae2c01bfa77c R12: ffff8f0522334230 R13: 0000000001000009 R14: ffff8f0523fdbc00 R15: ffff8f051f395800 FS: 0000000000000000(0000) GS:ffff8f0527c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffae2c01bfa794 CR3: 0000000389eba000 CR4: 00000000000006e0 Call Trace: iwl_dbg_tlv_alloc+0x79/0x120 [iwlwifi] iwl_parse_tlv_firmware.isra.0+0x57d/0x1550 [iwlwifi] iwl_req_fw_callback+0x3f8/0x6a0 [iwlwifi] request_firmware_work_func+0x47/0x90 process_one_work+0x1e3/0x3b0 worker_thread+0x46/0x340 kthread+0x115/0x140 ret_from_fork+0x1f/0x40 As can be seen, write bit is not set in the PTE. Read of trig->occurrences succeeds in iwl_dbg_tlv_alloc_trigger, but trig->occurrences = cpu_to_le32(-1); fails there, obviously. This is likely because we (at SUSE) use compressed firmware and that is marked as RO after decompression (see fw_map_paged_buf). Fix it by creating a temporary buffer in case we need to change the memory. Signed-off-by: Jiri Slaby Reported-by: Dieter Nützel Tested-by: Dieter Nützel Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Luca Coelho Cc: Intel Linux Wireless Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200612073800.27742-1-jslaby@suse.cz --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 7987a288917bac..27116c7d3f4f86 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -271,6 +271,8 @@ static int iwl_dbg_tlv_alloc_trigger(struct iwl_trans *trans, { struct iwl_fw_ini_trigger_tlv *trig = (void *)tlv->data; u32 tp = le32_to_cpu(trig->time_point); + struct iwl_ucode_tlv *dup = NULL; + int ret; if (le32_to_cpu(tlv->length) < sizeof(*trig)) return -EINVAL; @@ -283,10 +285,20 @@ static int iwl_dbg_tlv_alloc_trigger(struct iwl_trans *trans, return -EINVAL; } - if (!le32_to_cpu(trig->occurrences)) + if (!le32_to_cpu(trig->occurrences)) { + dup = kmemdup(tlv, sizeof(*tlv) + le32_to_cpu(tlv->length), + GFP_KERNEL); + if (!dup) + return -ENOMEM; + trig = (void *)dup->data; trig->occurrences = cpu_to_le32(-1); + tlv = dup; + } + + ret = iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].trig_list); + kfree(dup); - return iwl_dbg_tlv_add(tlv, &trans->dbg.time_point[tp].trig_list); + return ret; } static int (*dbg_tlv_alloc[])(struct iwl_trans *trans, From 4ac668a3b8c9d3477a3fe162c1cfeb867dd65de8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Jun 2020 20:13:41 +0200 Subject: [PATCH 007/131] mt76: mt76x02: do not access uninitialized NAPI structs Fixes a crash on MMIO devices when running into the watchdog reset Fixes: d3377b78cec6 ("mt76: add HE phy modes and hardware queue") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200615181341.81871-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index cbbe986655fe50..5fda6e7b120c20 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -456,8 +456,9 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) tasklet_disable(&dev->mt76.tx_tasklet); napi_disable(&dev->mt76.tx_napi); - for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) + mt76_for_each_q_rx(&dev->mt76, i) { napi_disable(&dev->mt76.napi[i]); + } mutex_lock(&dev->mt76.mutex); @@ -515,7 +516,7 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) tasklet_enable(&dev->mt76.pre_tbtt_tasklet); - for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) { + mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); napi_schedule(&dev->mt76.napi[i]); } From b1e79d105516fb96c39eb8a780936b87bae8e3e1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 21 Jun 2020 18:03:38 +0200 Subject: [PATCH 008/131] mt76: mt7663u: fix memory leaks in mt7663u_probe Fix the two following memory leaks in mt7663u_probe: 1- if device power-own times out, remove ieee80211 hw device. 2- if mt76u queues allocation fails, remove pending urbs. Fixes: eb99cc95c3b65 ("mt76: mt7615: introduce mt7663u support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/e4098f0c8a9ac51997de07f38c2bcdf7042d6db1.1592755166.git.lorenzo@kernel.org --- .../net/wireless/mediatek/mt76/mt7615/usb.c | 11 +++++----- drivers/net/wireless/mediatek/mt76/usb.c | 22 ++++++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index f5dc1b8285186a..5be6704770ad0c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -329,25 +329,26 @@ static int mt7663u_probe(struct usb_interface *usb_intf, if (!mt76_poll_msec(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_PWR_ON, FW_STATE_PWR_ON << 1, 500)) { dev_err(dev->mt76.dev, "Timeout for power on\n"); - return -EIO; + ret = -EIO; + goto error; } alloc_queues: ret = mt76u_alloc_mcu_queue(&dev->mt76); if (ret) - goto error; + goto error_free_q; ret = mt76u_alloc_queues(&dev->mt76); if (ret) - goto error; + goto error_free_q; ret = mt7663u_register_device(dev); if (ret) - goto error_freeq; + goto error_free_q; return 0; -error_freeq: +error_free_q: mt76u_queues_deinit(&dev->mt76); error: mt76u_deinit(&dev->mt76); diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 27abcdb61a064f..87382b2f74433d 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -1067,11 +1067,16 @@ static int mt76u_alloc_tx(struct mt76_dev *dev) static void mt76u_free_tx(struct mt76_dev *dev) { - struct mt76_queue *q; - int i, j; + int i; for (i = 0; i < IEEE80211_NUM_ACS; i++) { + struct mt76_queue *q; + int j; + q = dev->q_tx[i].q; + if (!q) + continue; + for (j = 0; j < q->ndesc; j++) usb_free_urb(q->entry[j].urb); } @@ -1079,17 +1084,22 @@ static void mt76u_free_tx(struct mt76_dev *dev) void mt76u_stop_tx(struct mt76_dev *dev) { - struct mt76_queue_entry entry; - struct mt76_queue *q; - int i, j, ret; + int ret; ret = wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(&dev->phy), HZ / 5); if (!ret) { + struct mt76_queue_entry entry; + struct mt76_queue *q; + int i, j; + dev_err(dev->dev, "timed out waiting for pending tx\n"); for (i = 0; i < IEEE80211_NUM_ACS; i++) { q = dev->q_tx[i].q; + if (!q) + continue; + for (j = 0; j < q->ndesc; j++) usb_kill_urb(q->entry[j].urb); } @@ -1101,6 +1111,8 @@ void mt76u_stop_tx(struct mt76_dev *dev) */ for (i = 0; i < IEEE80211_NUM_ACS; i++) { q = dev->q_tx[i].q; + if (!q) + continue; /* Assure we are in sync with killed tasklet. */ spin_lock_bh(&q->lock); From dc7bd30b97aac8a97eccef0ffe31f6cefb6e2c3e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Jun 2020 17:07:16 +0200 Subject: [PATCH 009/131] mt76: mt7615: fix EEPROM buffer size Avoid adding MT7615_EEPROM_SIZE twice. Rename MT7615_EEPROM_EXTRA_DATA to MT7615_EEPROM_FULL_SIZE, since it already includes MT7615_EEPROM_SIZE Fixes: ad380ad1ebbe ("mt76: mt7615: add support for applying DC offset calibration from EEPROM") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200622150716.49622-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 3 +-- drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index edac37e7847bad..22e4eabe6578d6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -72,8 +72,7 @@ static int mt7615_eeprom_load(struct mt7615_dev *dev, u32 addr) { int ret; - ret = mt76_eeprom_init(&dev->mt76, MT7615_EEPROM_SIZE + - MT7615_EEPROM_EXTRA_DATA); + ret = mt76_eeprom_init(&dev->mt76, MT7615_EEPROM_FULL_SIZE); if (ret < 0) return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h index 40fed7adc58a92..a024dee103620d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h @@ -17,7 +17,7 @@ #define MT7615_EEPROM_TXDPD_SIZE 216 #define MT7615_EEPROM_TXDPD_COUNT (44 + 3) -#define MT7615_EEPROM_EXTRA_DATA (MT7615_EEPROM_TXDPD_OFFSET + \ +#define MT7615_EEPROM_FULL_SIZE (MT7615_EEPROM_TXDPD_OFFSET + \ MT7615_EEPROM_TXDPD_COUNT * \ MT7615_EEPROM_TXDPD_SIZE) From 0bda49f30ca48998102eb0a0b53970c3a3558be0 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Thu, 18 Jun 2020 15:10:59 -0400 Subject: [PATCH 010/131] efi/x86: Remove unused variables Commit 987053a30016 ("efi/x86: Move command-line initrd loading to efi_main") made the ramdisk_addr/ramdisk_size variables in efi_pe_entry unused, but neglected to delete them. Delete these unused variables. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 5a48d996ed7104..37e82bf397aa0b 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -361,8 +361,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, int options_size = 0; efi_status_t status; char *cmdline_ptr; - unsigned long ramdisk_addr; - unsigned long ramdisk_size; efi_system_table = sys_table_arg; From 59476f80d8781a84e25f0cbcf378ccab1ad7abf8 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Thu, 18 Jun 2020 16:43:15 -0400 Subject: [PATCH 011/131] efi/x86: Only copy upto the end of setup_header When copying the setup_header into the boot_params buffer, only the data that is actually part of the setup_header should be copied. efi_pe_entry() currently copies the entire second sector, which initializes some of the fields in boot_params beyond the setup_header with garbage (i.e. part of the real-mode boot code gets copied into those fields). This does not cause any issues currently because the fields that are overwritten are padding, BIOS EDD information that won't get used, and the E820 table which will get properly filled in later. Fix this to only copy data that is actually part of the setup_header structure. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 37e82bf397aa0b..3672539cb96eb3 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -388,8 +389,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr = &boot_params->hdr; - /* Copy the second sector to boot_params */ - memcpy(&hdr->jump, image_base + 512, 512); + /* Copy the setup header from the second sector to boot_params */ + memcpy(&hdr->jump, image_base + 512, + sizeof(struct setup_header) - offsetof(struct setup_header, jump)); /* * Fill out some of the header fields ourselves because the From c1aac64ddc01112e137121a43645b96c3633c41b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 4 Jun 2020 11:20:30 +0900 Subject: [PATCH 012/131] efi/libstub/arm64: link stub lib.a conditionally Since commit 799c43415442 ("kbuild: thin archives make default for all archs"), core-y is passed to the linker with --whole-archive. Hence, the whole of stub library is linked to vmlinux. Use libs-y so that lib.a is passed after --no-whole-archive for conditional linking. The unused drivers/firmware/efi/libstub/relocate.o will be dropped for ARCH=arm64. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20200604022031.164207-1-masahiroy@kernel.org Signed-off-by: Ard Biesheuvel --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a76..4621fb690d9c82 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -143,7 +143,7 @@ export TEXT_OFFSET core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) -core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot From 950accbabd4cfa83519fa920f99428bcc131c3c9 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 25 Jun 2020 16:45:06 -0700 Subject: [PATCH 013/131] efi/libstub: Fix gcc error around __umoddi3 for 32 bit builds 32bit gcc doesn't support modulo operation on 64 bit data. It results in a __umoddi3 error while building EFI for 32 bit. Use bitwise operations instead of modulo operations to fix the issue. Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20200625234516.31406-2-atish.patra@wdc.com Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/alignedmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index cc89c4d6196f64..1de9878ddd3a2f 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -44,7 +44,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, *addr = ALIGN((unsigned long)alloc_addr, align); if (slack > 0) { - int l = (alloc_addr % align) / EFI_PAGE_SIZE; + int l = (alloc_addr & (align - 1)) / EFI_PAGE_SIZE; if (l) { efi_bs_call(free_pages, alloc_addr, slack - l + 1); From 3230d95cea0515a6acf3f5ff360663de4c40fd07 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 6 Jul 2020 10:25:59 -0700 Subject: [PATCH 014/131] efi/libstub: Move the function prototypes to header file The prototype of the functions handle_kernel_image & efi_enter_kernel are defined in efi-stub.c which may result in a compiler warnings if -Wmissing-prototypes is set in gcc compiler. Move the prototype to efistub.h to make the compiler happy. Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20200706172609.25965-2-atish.patra@wdc.com Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub.c | 17 ----------------- drivers/firmware/efi/libstub/efistub.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 3318ec3f8e5bcb..a5a405d8ab4447 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -121,23 +121,6 @@ static unsigned long get_dram_base(void) return membase; } -/* - * This function handles the architcture specific differences between arm and - * arm64 regarding where the kernel image must be loaded and any memory that - * must be reserved. On failure it is required to free all - * all allocations it has made. - */ -efi_status_t handle_kernel_image(unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image); - -asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint, - unsigned long fdt_addr, - unsigned long fdt_size); - /* * EFI entry point for the arm/arm64 EFI stubs. This is the entrypoint * that is described in the PE/COFF header. Most of the code is the same diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 2c9d42264c29bc..85050f5a1b2862 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -776,6 +776,22 @@ efi_status_t efi_load_initrd(efi_loaded_image_t *image, unsigned long *load_size, unsigned long soft_limit, unsigned long hard_limit); +/* + * This function handles the architcture specific differences between arm and + * arm64 regarding where the kernel image must be loaded and any memory that + * must be reserved. On failure it is required to free all + * all allocations it has made. + */ +efi_status_t handle_kernel_image(unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image); + +asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint, + unsigned long fdt_addr, + unsigned long fdt_size); void efi_handle_post_ebs_state(void); From f88814cc2578c121e6edef686365036db72af0ed Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 8 Jul 2020 13:01:57 +0300 Subject: [PATCH 015/131] efi/efivars: Expose RT service availability via efivars abstraction Commit bf67fad19e493b ("efi: Use more granular check for availability for variable services") introduced a check into the efivarfs, efi-pstore and other drivers that aborts loading of the module if not all three variable runtime services (GetVariable, SetVariable and GetNextVariable) are supported. However, this results in efivarfs being unavailable entirely if only SetVariable support is missing, which is only needed if you want to make any modifications. Also, efi-pstore and the sysfs EFI variable interface could be backed by another implementation of the 'efivars' abstraction, in which case it is completely irrelevant which services are supported by the EFI firmware. So make the generic 'efivars' abstraction dependent on the availibility of the GetVariable and GetNextVariable EFI runtime services, and add a helper 'efivar_supports_writes()' to find out whether the currently active efivars abstraction supports writes (and wire it up to the availability of SetVariable for the generic one). Then, use the efivar_supports_writes() helper to decide whether to permit efivarfs to be mounted read-write, and whether to enable efi-pstore or the sysfs EFI variable interface altogether. Fixes: bf67fad19e493b ("efi: Use more granular check for availability for variable services") Reported-by: Heinrich Schuchardt Acked-by: Ilias Apalodimas Tested-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-pstore.c | 5 +---- drivers/firmware/efi/efi.c | 12 ++++++++---- drivers/firmware/efi/efivars.c | 5 +---- drivers/firmware/efi/vars.c | 6 ++++++ fs/efivarfs/super.c | 6 +++--- include/linux/efi.h | 1 + 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index c2f1d4e6630b10..feb7fe6f2da76d 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -356,10 +356,7 @@ static struct pstore_info efi_pstore_info = { static __init int efivars_pstore_init(void) { - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return 0; - - if (!efivars_kobject()) + if (!efivars_kobject() || !efivar_supports_writes()) return 0; if (efivars_pstore_disable) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5114cae4ec9735..fdd1db025dbfda 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -176,11 +176,13 @@ static struct efivar_operations generic_ops; static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; - generic_ops.set_variable = efi.set_variable; - generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; + if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE)) { + generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; + } return efivars_register(&generic_efivars, &generic_ops, efi_kobj); } @@ -382,7 +384,8 @@ static int __init efisubsys_init(void) return -ENOMEM; } - if (efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) { + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | + EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) { efivar_ssdt_load(); error = generic_ops_register(); if (error) @@ -416,7 +419,8 @@ static int __init efisubsys_init(void) err_remove_group: sysfs_remove_group(efi_kobj, &efi_subsys_attr_group); err_unregister: - if (efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | + EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) generic_ops_unregister(); err_put: kobject_put(efi_kobj); diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 26528a46d99e9e..dcea137142b3ce 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -680,11 +680,8 @@ int efivars_sysfs_init(void) struct kobject *parent_kobj = efivars_kobject(); int error = 0; - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return -ENODEV; - /* No efivars has been registered yet */ - if (!parent_kobj) + if (!parent_kobj || !efivar_supports_writes()) return 0; printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION, diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 5f2a4d162795c2..973eef234b365e 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -1229,3 +1229,9 @@ int efivars_unregister(struct efivars *efivars) return rv; } EXPORT_SYMBOL_GPL(efivars_unregister); + +int efivar_supports_writes(void) +{ + return __efivars && __efivars->ops->set_variable; +} +EXPORT_SYMBOL_GPL(efivar_supports_writes); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 12c66f5d92dd2e..28bb5689333a59 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -201,6 +201,9 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; + if (!efivar_supports_writes()) + sb->s_flags |= SB_RDONLY; + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; @@ -252,9 +255,6 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return -ENODEV; - if (!efivars_kobject()) return -ENODEV; diff --git a/include/linux/efi.h b/include/linux/efi.h index bb35f3305e5506..05c47f857383eb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -994,6 +994,7 @@ int efivars_register(struct efivars *efivars, int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); +int efivar_supports_writes(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); From 769e0fe1171e95d90ea5a2d6d0b2bdc7d5d2e7b2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 9 Jul 2020 09:59:57 +0300 Subject: [PATCH 016/131] efi: Revert "efi/x86: Fix build with gcc 4" This reverts commit 5435f73d5c4a1b75, which is no longer needed now that the minimum GCC version has been bumped to v4.9 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 4cce372edaf4de..75daaf20374eec 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -6,8 +6,7 @@ # enabled, even if doing so doesn't break the build. # cflags-$(CONFIG_X86_32) := -march=i386 -cflags-$(CONFIG_X86_64) := -mcmodel=small \ - $(call cc-option,-maccumulate-outgoing-args) +cflags-$(CONFIG_X86_64) := -mcmodel=small cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ \ -fPIC -fno-strict-aliasing -mno-red-zone \ -mno-mmx -mno-sse -fshort-wchar \ From 163e9ef63641a02de4c95cd921577265c52e1ce2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 11 Jul 2020 20:48:23 -0400 Subject: [PATCH 017/131] bnxt_en: Fix race when modifying pause settings. The driver was modified to not rely on rtnl lock to protect link settings about 2 years ago. The pause setting was missed when making that change. Fix it by acquiring link_lock mutex before calling bnxt_hwrm_set_pause(). Fixes: e2dc9b6e38fa ("bnxt_en: Don't use rtnl lock to protect link change logic in workqueue.") Signed-off-by: Vasundhara Volam Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6b88143af5ea19..b4aa56dc4f9fb4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1765,8 +1765,11 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (epause->tx_pause) link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX; - if (netif_running(dev)) + if (netif_running(dev)) { + mutex_lock(&bp->link_lock); rc = bnxt_hwrm_set_pause(bp); + mutex_unlock(&bp->link_lock); + } return rc; } From ca0c753815fe4786b79a80abf0412eb5d52090b8 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 11 Jul 2020 20:48:24 -0400 Subject: [PATCH 018/131] bnxt_en: Init ethtool link settings after reading updated PHY configuration. In a shared port PHY configuration, async event is received when any of the port modifies the configuration. Ethtool link settings should be initialised after updated PHY configuration from firmware. Fixes: b1613e78e98d ("bnxt_en: Add async. event logic for PHY configuration changes.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a884df44612aa..28147e41408197 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10385,15 +10385,15 @@ static void bnxt_sp_task(struct work_struct *work) &bp->sp_event)) bnxt_hwrm_phy_qcaps(bp); - if (test_and_clear_bit(BNXT_LINK_CFG_CHANGE_SP_EVENT, - &bp->sp_event)) - bnxt_init_ethtool_link_settings(bp); - rc = bnxt_update_link(bp, true); - mutex_unlock(&bp->link_lock); if (rc) netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", rc); + + if (test_and_clear_bit(BNXT_LINK_CFG_CHANGE_SP_EVENT, + &bp->sp_event)) + bnxt_init_ethtool_link_settings(bp); + mutex_unlock(&bp->link_lock); } if (test_and_clear_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event)) { int rc; From 27640ce68d21e556b66bc5fa022aacd26e53c947 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 11 Jul 2020 20:48:25 -0400 Subject: [PATCH 019/131] bnxt_en: Fix completion ring sizing with TPA enabled. The current completion ring sizing formula is wrong with TPA enabled. The formula assumes that the number of TPA completions are bound by the RX ring size, but that's not true. TPA_START completions are immediately recycled so they are not bound by the RX ring size. We must add bp->max_tpa to the worst case maximum RX and TPA completions. The completion ring can overflow because of this mistake. This will cause hardware to disable the completion ring when this happens, leading to RX and TX traffic to stall on that ring. This issue is generally exposed only when the RX ring size is set very small. Fix the formula by adding bp->max_tpa to the number of RX completions if TPA is enabled. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver."); Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 28147e41408197..7463a1847cebd6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3418,7 +3418,7 @@ void bnxt_set_tpa_flags(struct bnxt *bp) */ void bnxt_set_ring_params(struct bnxt *bp) { - u32 ring_size, rx_size, rx_space; + u32 ring_size, rx_size, rx_space, max_rx_cmpl; u32 agg_factor = 0, agg_ring_size = 0; /* 8 for CRC and VLAN */ @@ -3474,7 +3474,15 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->tx_nr_pages = bnxt_calc_nr_ring_pages(ring_size, TX_DESC_CNT); bp->tx_ring_mask = (bp->tx_nr_pages * TX_DESC_CNT) - 1; - ring_size = bp->rx_ring_size * (2 + agg_factor) + bp->tx_ring_size; + max_rx_cmpl = bp->rx_ring_size; + /* MAX TPA needs to be added because TPA_START completions are + * immediately recycled, so the TPA completions are not bound by + * the RX ring size. + */ + if (bp->flags & BNXT_FLAG_TPA) + max_rx_cmpl += bp->max_tpa; + /* RX and TPA completions are 32-byte, all others are 16-byte */ + ring_size = max_rx_cmpl * 2 + agg_ring_size + bp->tx_ring_size; bp->cp_ring_size = ring_size; bp->cp_nr_pages = bnxt_calc_nr_ring_pages(ring_size, CP_DESC_CNT); From e0484010ec05191a8edf980413fc92f28050c1cc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 13 Jul 2020 13:05:13 +0200 Subject: [PATCH 020/131] usb: hso: Fix debug compile warning on sparc32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On sparc32, tcflag_t is "unsigned long", unlike on all other architectures, where it is "unsigned int": drivers/net/usb/hso.c: In function ‘hso_serial_set_termios’: include/linux/kern_levels.h:5:18: warning: format ‘%d’ expects argument of type ‘unsigned int’, but argument 4 has type ‘tcflag_t {aka long unsigned int}’ [-Wformat=] drivers/net/usb/hso.c:1393:3: note: in expansion of macro ‘hso_dbg’ hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", ^~~~~~~ include/linux/kern_levels.h:5:18: warning: format ‘%d’ expects argument of type ‘unsigned int’, but argument 5 has type ‘tcflag_t {aka long unsigned int}’ [-Wformat=] drivers/net/usb/hso.c:1393:3: note: in expansion of macro ‘hso_dbg’ hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", ^~~~~~~ As "unsigned long" is 32-bit on sparc32, fix this by casting all tcflag_t parameters to "unsigned int". While at it, use "%u" to format unsigned numbers. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index bb8c34d746ab33..5f123a8cf68ed9 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1390,8 +1390,9 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old) unsigned long flags; if (old) - hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", - tty->termios.c_cflag, old->c_cflag); + hso_dbg(0x16, "Termios called with: cflags new[%u] - old[%u]\n", + (unsigned int)tty->termios.c_cflag, + (unsigned int)old->c_cflag); /* the actual setup */ spin_lock_irqsave(&serial->serial_lock, flags); From 46ef5b89ec0ecf290d74c4aee844f063933c4da4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 Jul 2020 23:59:50 +0800 Subject: [PATCH 021/131] ip6_gre: fix null-ptr-deref in ip6gre_init_net() KASAN report null-ptr-deref error when register_netdev() failed: KASAN: null-ptr-deref in range [0x00000000000003c0-0x00000000000003c7] CPU: 2 PID: 422 Comm: ip Not tainted 5.8.0-rc4+ #12 Call Trace: ip6gre_init_net+0x4ab/0x580 ? ip6gre_tunnel_uninit+0x3f0/0x3f0 ops_init+0xa8/0x3c0 setup_net+0x2de/0x7e0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? ops_init+0x3c0/0x3c0 ? kasan_unpoison_shadow+0x33/0x40 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 copy_net_ns+0x27d/0x530 create_new_namespaces+0x382/0xa30 unshare_nsproxy_namespaces+0xa1/0x1d0 ksys_unshare+0x39c/0x780 ? walk_process_tree+0x2a0/0x2a0 ? trace_hardirqs_on+0x4a/0x1b0 ? _raw_spin_unlock_irq+0x1f/0x30 ? syscall_trace_enter+0x1a7/0x330 ? do_syscall_64+0x1c/0xa0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x56/0xa0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ip6gre_tunnel_uninit() has set 'ign->fb_tunnel_dev' to NULL, later access to ign->fb_tunnel_dev cause null-ptr-deref. Fix it by saving 'ign->fb_tunnel_dev' to local variable ndev. Fixes: dafabb6590cb ("ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6532bde82b40a1..3a57fb9ce0494b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1562,17 +1562,18 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) static int __net_init ip6gre_init_net(struct net *net) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *ndev; int err; if (!net_has_fallback_tunnels(net)) return 0; - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", - NET_NAME_UNKNOWN, - ip6gre_tunnel_setup); - if (!ign->fb_tunnel_dev) { + ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + NET_NAME_UNKNOWN, ip6gre_tunnel_setup); + if (!ndev) { err = -ENOMEM; goto err_alloc_dev; } + ign->fb_tunnel_dev = ndev; dev_net_set(ign->fb_tunnel_dev, net); /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. @@ -1592,7 +1593,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - free_netdev(ign->fb_tunnel_dev); + free_netdev(ndev); err_alloc_dev: return err; } From 3195c4706b00106aa82c73acd28340fa8fc2bfc1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 14 Jul 2020 13:00:27 +0200 Subject: [PATCH 022/131] hippi: Fix a size used in a 'pci_free_consistent()' in an error handling path The size used when calling 'pci_alloc_consistent()' and 'pci_free_consistent()' should match. Fix it and have it consistent with the corresponding call in 'rr_close()'. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 2a6ec539496661..a4b3fce69ecd96 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1242,7 +1242,7 @@ static int rr_open(struct net_device *dev) rrpriv->info = NULL; } if (rrpriv->rx_ctrl) { - pci_free_consistent(pdev, sizeof(struct ring_ctrl), + pci_free_consistent(pdev, 256 * sizeof(struct ring_ctrl), rrpriv->rx_ctrl, rrpriv->rx_ctrl_dma); rrpriv->rx_ctrl = NULL; } From ff021f22ea8f1388ced048243c6e06ca5cfbd62a Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Tue, 14 Jul 2020 15:01:04 +0300 Subject: [PATCH 023/131] gianfar: Use random MAC address when none is given If there is no valid MAC address in the device tree, use a random MAC address. Signed-off-by: Maxim Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index b3c69e9038eac4..b513b8c5c3b5e5 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -779,8 +779,12 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) + if (!IS_ERR(mac_addr)) { ether_addr_copy(dev->dev_addr, mac_addr); + } else { + eth_hw_addr_random(dev); + dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr); + } if (model && !strcasecmp(model, "TSEC")) priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT | From 651149f60376758a4759f761767965040f9e4464 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Tue, 14 Jul 2020 17:40:55 +0200 Subject: [PATCH 024/131] selftests: fib_nexthop_multiprefix: fix cleanup() netns deletion During setup(): ... for ns in h0 r1 h1 h2 h3 do create_ns ${ns} done ... while in cleanup(): ... for n in h1 r1 h2 h3 h4 do ip netns del ${n} 2>/dev/null done ... and after removing the stderr redirection in cleanup(): $ sudo ./fib_nexthop_multiprefix.sh ... TEST: IPv4: host 0 to host 3, mtu 1400 [ OK ] TEST: IPv6: host 0 to host 3, mtu 1400 [ OK ] Cannot remove namespace file "/run/netns/h4": No such file or directory $ echo $? 1 and a non-zero return code, make kselftests fail (even if the test itself is fine): ... not ok 34 selftests: net: fib_nexthop_multiprefix.sh # exit=1 ... Signed-off-by: Paolo Pisati Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthop_multiprefix.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index 9dc35a16e41596..51df5e305855a7 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -144,7 +144,7 @@ setup() cleanup() { - for n in h1 r1 h2 h3 h4 + for n in h0 r1 h1 h2 h3 do ip netns del ${n} 2>/dev/null done From 1d61e21852d3161f234b9656797669fe185c251b Mon Sep 17 00:00:00 2001 From: Laurence Oberman Date: Tue, 14 Jul 2020 18:08:05 -0400 Subject: [PATCH 025/131] qed: Disable "MFW indication via attention" SPAM every 5 minutes This is likely firmware causing this but its starting to annoy customers. Change the message level to verbose to prevent the spam. Note that this seems to only show up with ISCSI enabled on the HBA via the qedi driver. Signed-off-by: Laurence Oberman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_int.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index b7b974f0ef210e..7e13a9d9b89c69 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1193,7 +1193,8 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) index, attn_bits, attn_acks, asserted_bits, deasserted_bits, p_sb_attn_sw->known_attn); } else if (asserted_bits == 0x100) { - DP_INFO(p_hwfn, "MFW indication via attention\n"); + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, + "MFW indication via attention\n"); } else { DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "MFW indication [deassertion]\n"); From 1e9451cbda456a170518b2bfd643e2cb980880bf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jul 2020 18:51:39 +0200 Subject: [PATCH 026/131] netfilter: nf_tables: fix nat hook table deletion sybot came up with following transaction: add table ip syz0 add chain ip syz0 syz2 { type nat hook prerouting priority 0; policy accept; } add table ip syz0 { flags dormant; } delete chain ip syz0 syz2 delete table ip syz0 which yields: hook not found, pf 2 num 0 WARNING: CPU: 0 PID: 6775 at net/netfilter/core.c:413 __nf_unregister_net_hook+0x3e6/0x4a0 net/netfilter/core.c:413 [..] nft_unregister_basechain_hooks net/netfilter/nf_tables_api.c:206 [inline] nft_table_disable net/netfilter/nf_tables_api.c:835 [inline] nf_tables_table_disable net/netfilter/nf_tables_api.c:868 [inline] nf_tables_commit+0x32d3/0x4d70 net/netfilter/nf_tables_api.c:7550 nfnetlink_rcv_batch net/netfilter/nfnetlink.c:486 [inline] nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:544 [inline] nfnetlink_rcv+0x14a5/0x1e50 net/netfilter/nfnetlink.c:562 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] Problem is that when I added ability to override base hook registration to make nat basechains register with the nat core instead of netfilter core, I forgot to update nft_table_disable() to use that instead of the 'raw' hook register interface. In syzbot transaction, the basechain is of 'nat' type. Its registered with the nat core. The switch to 'dormant mode' attempts to delete from netfilter core instead. After updating nft_table_disable/enable to use the correct helper, nft_(un)register_basechain_hooks can be folded into the only remaining caller. Because nft_trans_table_enable() won't do anything when the DORMANT flag is set, remove the flag first, then re-add it in case re-enablement fails, else this patch breaks sequence: add table ip x { flags dormant; } /* add base chains */ add table ip x The last 'add' will remove the dormant flags, but won't have any other effect -- base chains are not registered. Then, next 'set dormant flag' will create another 'hook not found' splat. Reported-by: syzbot+2570f2c036e3da5db176@syzkaller.appspotmail.com Fixes: 4e25ceb80b58 ("netfilter: nf_tables: allow chain type to override hook register") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 41 ++++++++++++----------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7647ecfa0d4078..88325b264737f3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -188,24 +188,6 @@ static void nft_netdev_unregister_hooks(struct net *net, nf_unregister_net_hook(net, &hook->ops); } -static int nft_register_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - return nft_netdev_register_hooks(net, &basechain->hook_list); - - return nf_register_net_hook(net, &basechain->ops); -} - -static void nft_unregister_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - nft_netdev_unregister_hooks(net, &basechain->hook_list); - else - nf_unregister_net_hook(net, &basechain->ops); -} - static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -223,7 +205,10 @@ static int nf_tables_register_hook(struct net *net, if (basechain->type->ops_register) return basechain->type->ops_register(net, ops); - return nft_register_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + return nft_netdev_register_hooks(net, &basechain->hook_list); + + return nf_register_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -242,7 +227,10 @@ static void nf_tables_unregister_hook(struct net *net, if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - nft_unregister_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + nft_netdev_unregister_hooks(net, &basechain->hook_list); + else + nf_unregister_net_hook(net, &basechain->ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -832,8 +820,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) if (cnt && i++ == cnt) break; - nft_unregister_basechain_hooks(net, table->family, - nft_base_chain(chain)); + nf_tables_unregister_hook(net, table, chain); } } @@ -848,8 +835,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) if (!nft_is_base_chain(chain)) continue; - err = nft_register_basechain_hooks(net, table->family, - nft_base_chain(chain)); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err_register_hooks; @@ -894,11 +880,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (ret >= 0) nft_trans_table_enable(trans) = true; - } + else + ctx->table->flags |= NFT_TABLE_F_DORMANT; } if (ret < 0) goto err; From 841eb4012cef84820e5906527b31a854f42b0748 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 14 Jul 2020 15:08:16 +0300 Subject: [PATCH 027/131] dpaa2-eth: check fsl_mc_get_endpoint for IS_ERR_OR_NULL() The fsl_mc_get_endpoint() function can return an error or directly a NULL pointer in case the peer device is not under the root DPRC container. Treat this case also, otherwise it would lead to a NULL pointer when trying to access the peer fsl_mc_device. Fixes: 719479230893 ("dpaa2-eth: add MAC/PHY support through phylink") Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index f150cd454fa449..0998ceb1a26ea8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3632,7 +3632,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent); dpmac_dev = fsl_mc_get_endpoint(dpni_dev); - if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + if (IS_ERR_OR_NULL(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) return 0; if (dpaa2_mac_is_type_fixed(dpmac_dev, priv->mc_io)) From f961134a612c793d5901a93d85a29337c74af978 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jul 2020 14:12:43 +0200 Subject: [PATCH 028/131] vsock/virtio: annotate 'the_virtio_vsock' RCU pointer Commit 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on the_virtio_vsock") starts to use RCU to protect 'the_virtio_vsock' pointer, but we forgot to annotate it. This patch adds the annotation to fix the following sparse errors: net/vmw_vsock/virtio_transport.c:73:17: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:73:17: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:73:17: struct virtio_vsock * net/vmw_vsock/virtio_transport.c:171:17: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:171:17: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:171:17: struct virtio_vsock * net/vmw_vsock/virtio_transport.c:207:17: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:207:17: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:207:17: struct virtio_vsock * net/vmw_vsock/virtio_transport.c:561:13: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:561:13: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:561:13: struct virtio_vsock * net/vmw_vsock/virtio_transport.c:612:9: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:612:9: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:612:9: struct virtio_vsock * net/vmw_vsock/virtio_transport.c:631:9: error: incompatible types in comparison expression (different address spaces): net/vmw_vsock/virtio_transport.c:631:9: struct virtio_vsock [noderef] __rcu * net/vmw_vsock/virtio_transport.c:631:9: struct virtio_vsock * Fixes: 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on the_virtio_vsock") Reported-by: Michael S. Tsirkin Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index dfbaf6bd8b1c75..2700a63ab095e5 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -22,7 +22,7 @@ #include static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; +static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ struct virtio_vsock { From c28d9a285668c799eeae2f7f93e929a6028a4d6d Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Wed, 15 Jul 2020 09:59:31 -0400 Subject: [PATCH 029/131] ax88172a: fix ax88172a_unbind() failures If ax88172a_unbind() fails, make sure that the return code is less than zero so that cleanup is done properly and avoid UAF. Fixes: a9a51bd727d1 ("ax88172a: fix information leak on short answers") Signed-off-by: George Kennedy Reported-by: syzbot+4cd84f527bf4a10fc9c1@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88172a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 4e514f5d7c6c7c..fd3a04d98dc14b 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -187,6 +187,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); if (ret < ETH_ALEN) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); + ret = -EIO; goto free; } memcpy(dev->net->dev_addr, buf, ETH_ALEN); From 340746398b67e3ce5019698748ebaa7adf048114 Mon Sep 17 00:00:00 2001 From: Sergey Organov Date: Tue, 14 Jul 2020 19:28:02 +0300 Subject: [PATCH 030/131] net: fec: fix hardware time stamping by external devices Fix support for external PTP-aware devices such as DSA or PTP PHY: Make sure we never time stamp tx packets when hardware time stamping is disabled. Check for PTP PHY being in use and then pass ioctls related to time stamping of Ethernet packets to the PTP PHY rather than handle them ourselves. In addition, disable our own hardware time stamping in this case. Fixes: 6605b730c061 ("FEC: Add time stamping code and a PTP hardware clock") Signed-off-by: Sergey Organov Acked-by: Richard Cochran Acked-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 1 + drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------ drivers/net/ethernet/freescale/fec_ptp.c | 12 ++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index d8d76da51c5e9a..832a2175636d69 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -590,6 +590,7 @@ struct fec_enet_private { void fec_ptp_init(struct platform_device *pdev, int irq_idx); void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); +void fec_ptp_disable_hwts(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3982285ed020e2..cc7fbfc093548e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1294,8 +1294,13 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) ndev->stats.tx_bytes += skb->len; } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) && - fep->bufdesc_ex) { + /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who + * are to time stamp the packet, so we still need to check time + * stamping enabled flag. + */ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && + fep->hwts_tx_en) && + fep->bufdesc_ex) { struct skb_shared_hwtstamps shhwtstamps; struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; @@ -2723,10 +2728,16 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) return -ENODEV; if (fep->bufdesc_ex) { - if (cmd == SIOCSHWTSTAMP) - return fec_ptp_set(ndev, rq); - if (cmd == SIOCGHWTSTAMP) - return fec_ptp_get(ndev, rq); + bool use_fec_hwts = !phy_has_hwtstamp(phydev); + + if (cmd == SIOCSHWTSTAMP) { + if (use_fec_hwts) + return fec_ptp_set(ndev, rq); + fec_ptp_disable_hwts(ndev); + } else if (cmd == SIOCGHWTSTAMP) { + if (use_fec_hwts) + return fec_ptp_get(ndev, rq); + } } return phy_mii_ioctl(phydev, rq, cmd); diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 945643c026155f..f8a592c96beb06 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -452,6 +452,18 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } +/** + * fec_ptp_disable_hwts - disable hardware time stamping + * @ndev: pointer to net_device + */ +void fec_ptp_disable_hwts(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + fep->hwts_tx_en = 0; + fep->hwts_rx_en = 0; +} + int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr) { struct fec_enet_private *fep = netdev_priv(ndev); From 473309fb8372365ad211f425bca760af800e10a7 Mon Sep 17 00:00:00 2001 From: Sergey Organov Date: Wed, 15 Jul 2020 19:10:00 +0300 Subject: [PATCH 031/131] net: dp83640: fix SIOCSHWTSTAMP to update the struct with actual configuration From Documentation/networking/timestamping.txt: A driver which supports hardware time stamping shall update the struct with the actual, possibly more permissive configuration. Do update the struct passed when we upscale the requested time stamping mode. Fixes: cb646e2b02b2 ("ptp: Added a clock driver for the National Semiconductor PHYTER.") Signed-off-by: Sergey Organov Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83640.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index ecbd5e0d685cf3..acb0aae607558f 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1260,6 +1260,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4; dp83640->version = PTP_CLASS_V1; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: @@ -1267,6 +1268,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4; dp83640->version = PTP_CLASS_V2; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: @@ -1274,6 +1276,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L2; dp83640->version = PTP_CLASS_V2; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: @@ -1281,6 +1284,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4 | PTP_CLASS_L2; dp83640->version = PTP_CLASS_V2; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; default: return -ERANGE; From 66673f96f0f968b991dc38be06102246919c663c Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 17 Jul 2020 17:01:21 +0800 Subject: [PATCH 032/131] ieee802154: fix one possible memleak in adf7242_probe When probe fail, we should destroy the workqueue. Fixes: 2795e8c25161 ("net: ieee802154: fix a potential NULL pointer dereference") Signed-off-by: Liu Jian Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20200717090121.2143-1-liujian56@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 5a37514e423477..8dbccec6ac8669 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1262,7 +1262,7 @@ static int adf7242_probe(struct spi_device *spi) WQ_MEM_RECLAIM); if (unlikely(!lp->wqueue)) { ret = -ENOMEM; - goto err_hw_init; + goto err_alloc_wq; } ret = adf7242_hw_init(lp); @@ -1294,6 +1294,8 @@ static int adf7242_probe(struct spi_device *spi) return ret; err_hw_init: + destroy_workqueue(lp->wqueue); +err_alloc_wq: mutex_destroy(&lp->bmux); ieee802154_free_hw(lp->hw); From 56a1c778c7663a3068a59b4cb8c8fd27506b3eca Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 14 Jul 2020 13:15:20 -0400 Subject: [PATCH 033/131] docs: ptp.rst: add support for Renesas (IDT) ClockMatrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add below to “Ancillary clock features” section - Low Pass Filter (LPF) access from user space Add below to list of “Supported hardware” section + Renesas (IDT) ClockMatrix™ Signed-off-by: Min Li Signed-off-by: David S. Miller --- Documentation/driver-api/ptp.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst index a15192e323478b..664838ae777601 100644 --- a/Documentation/driver-api/ptp.rst +++ b/Documentation/driver-api/ptp.rst @@ -23,6 +23,7 @@ PTP hardware clock infrastructure for Linux + Ancillary clock features - Time stamp external events - Period output signals configurable from user space + - Low Pass Filter (LPF) access from user space - Synchronization of the Linux system time via the PPS subsystem PTP hardware clock kernel API @@ -94,3 +95,14 @@ Supported hardware - Auxiliary Slave/Master Mode Snapshot (optional interrupt) - Target Time (optional interrupt) + + * Renesas (IDT) ClockMatrix™ + + - Up to 4 independent PHC channels + - Integrated low pass filter (LPF), access via .adjPhase (compliant to ITU-T G.8273.2) + - Programmable output periodic signals + - Programmable inputs can time stamp external triggers + - Driver and/or hardware configuration through firmware (idtcm.bin) + - LPF settings (bandwidth, phase limiting, automatic holdover, physical layer assist (per ITU-T G.8273.2)) + - Programmable output PTP clocks, any frequency up to 1GHz (to other PHY/MAC time stampers, refclk to ASSPs/SoCs/FPGAs) + - Lock to GNSS input, automatic switching between GNSS and user-space PHC control (optional) From cebb69754f37d68e1355a5e726fdac317bcda302 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Wed, 15 Jul 2020 20:58:10 +0800 Subject: [PATCH 034/131] rtnetlink: Fix memory(net_device) leak when ->newlink fails When vlan_newlink call register_vlan_dev fails, it might return error with dev->reg_state = NETREG_UNREGISTERED. The rtnl_newlink should free the memory. But currently rtnl_newlink only free the memory which state is NETREG_UNINITIALIZED. BUG: memory leak unreferenced object 0xffff8881051de000 (size 4096): comm "syz-executor139", pid 560, jiffies 4294745346 (age 32.445s) hex dump (first 32 bytes): 76 6c 61 6e 32 00 00 00 00 00 00 00 00 00 00 00 vlan2........... 00 45 28 03 81 88 ff ff 00 00 00 00 00 00 00 00 .E(............. backtrace: [<0000000047527e31>] kmalloc_node include/linux/slab.h:578 [inline] [<0000000047527e31>] kvmalloc_node+0x33/0xd0 mm/util.c:574 [<000000002b59e3bc>] kvmalloc include/linux/mm.h:753 [inline] [<000000002b59e3bc>] kvzalloc include/linux/mm.h:761 [inline] [<000000002b59e3bc>] alloc_netdev_mqs+0x83/0xd90 net/core/dev.c:9929 [<000000006076752a>] rtnl_create_link+0x2c0/0xa20 net/core/rtnetlink.c:3067 [<00000000572b3be5>] __rtnl_newlink+0xc9c/0x1330 net/core/rtnetlink.c:3329 [<00000000e84ea553>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3397 [<0000000052c7c0a9>] rtnetlink_rcv_msg+0x540/0x990 net/core/rtnetlink.c:5460 [<000000004b5cb379>] netlink_rcv_skb+0x12b/0x3a0 net/netlink/af_netlink.c:2469 [<00000000c71c20d3>] netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] [<00000000c71c20d3>] netlink_unicast+0x4c6/0x690 net/netlink/af_netlink.c:1329 [<00000000cca72fa9>] netlink_sendmsg+0x735/0xcc0 net/netlink/af_netlink.c:1918 [<000000009221ebf7>] sock_sendmsg_nosec net/socket.c:652 [inline] [<000000009221ebf7>] sock_sendmsg+0x109/0x140 net/socket.c:672 [<000000001c30ffe4>] ____sys_sendmsg+0x5f5/0x780 net/socket.c:2352 [<00000000b71ca6f3>] ___sys_sendmsg+0x11d/0x1a0 net/socket.c:2406 [<0000000007297384>] __sys_sendmsg+0xeb/0x1b0 net/socket.c:2439 [<000000000eb29b11>] do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 [<000000006839b4d0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak") Reported-by: Hulk Robot Signed-off-by: Weilong Chen Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9aedc15736adfb..85a4b0101f7618 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3343,7 +3343,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, */ if (err < 0) { /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED) + if (dev->reg_state == NETREG_UNINITIALIZED || + dev->reg_state == NETREG_UNREGISTERED) free_netdev(dev); goto out; } From bca9749b1aa23d964d3ab930938af66dbf887f15 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 16 Jul 2020 11:50:38 +0800 Subject: [PATCH 035/131] net: smc91x: Fix possible memory leak in smc_drv_probe() If try_toggle_control_gpio() failed in smc_drv_probe(), free_netdev(ndev) should be called to free the ndev created earlier. Otherwise, a memleak will occur. Fixes: 7d2911c43815 ("net: smc91x: Fix gpios for device tree based booting") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 90410f9d3b1aae..1c4fea9c3ec4c8 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2274,7 +2274,7 @@ static int smc_drv_probe(struct platform_device *pdev) ret = try_toggle_control_gpio(&pdev->dev, &lp->power_gpio, "power", 0, 0, 100); if (ret) - return ret; + goto out_free_netdev; /* * Optional reset GPIO configured? Minimum 100 ns reset needed @@ -2283,7 +2283,7 @@ static int smc_drv_probe(struct platform_device *pdev) ret = try_toggle_control_gpio(&pdev->dev, &lp->reset_gpio, "reset", 0, 0, 100); if (ret) - return ret; + goto out_free_netdev; /* * Need to wait for optional EEPROM to load, max 750 us according From aba69d49fb49c9166596dd78926514173b7f9ab5 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Thu, 16 Jul 2020 17:51:14 +0200 Subject: [PATCH 036/131] selftests: net: ip_defrag: modprobe missing nf_defrag_ipv6 support Fix ip_defrag.sh when CONFIG_NF_DEFRAG_IPV6=m: $ sudo ./ip_defrag.sh + set -e + mktemp -u XXXXXX + readonly NETNS=ns-rGlXcw + trap cleanup EXIT + setup + ip netns add ns-rGlXcw + ip -netns ns-rGlXcw link set lo up + ip netns exec ns-rGlXcw sysctl -w net.ipv4.ipfrag_high_thresh=9000000 + ip netns exec ns-rGlXcw sysctl -w net.ipv4.ipfrag_low_thresh=7000000 + ip netns exec ns-rGlXcw sysctl -w net.ipv4.ipfrag_time=1 + ip netns exec ns-rGlXcw sysctl -w net.ipv6.ip6frag_high_thresh=9000000 + ip netns exec ns-rGlXcw sysctl -w net.ipv6.ip6frag_low_thresh=7000000 + ip netns exec ns-rGlXcw sysctl -w net.ipv6.ip6frag_time=1 + ip netns exec ns-rGlXcw sysctl -w net.netfilter.nf_conntrack_frag6_high_thresh=9000000 + cleanup + ip netns del ns-rGlXcw $ ls -la /proc/sys/net/netfilter/nf_conntrack_frag6_high_thresh ls: cannot access '/proc/sys/net/netfilter/nf_conntrack_frag6_high_thresh': No such file or directory $ sudo modprobe nf_defrag_ipv6 $ ls -la /proc/sys/net/netfilter/nf_conntrack_frag6_high_thresh -rw-r--r-- 1 root root 0 Jul 14 12:34 /proc/sys/net/netfilter/nf_conntrack_frag6_high_thresh Signed-off-by: Paolo Pisati Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/net/ip_defrag.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh index 15d3489ecd9cee..ceb7ad4dbd945f 100755 --- a/tools/testing/selftests/net/ip_defrag.sh +++ b/tools/testing/selftests/net/ip_defrag.sh @@ -6,6 +6,8 @@ set +x set -e +modprobe -q nf_defrag_ipv6 + readonly NETNS="ns-$(mktemp -u XXXXXX)" setup() { From 2f11f0df8474b2206c8cf2d5d5b98e7eff240cdf Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 16 Jul 2020 16:38:15 -0700 Subject: [PATCH 037/131] net: bcmgenet: test MPD_EN when resuming When the GENET driver resumes from deep sleep the UMAC_CMD register may not be accessible and therefore should not be accessed from bcmgenet_wol_power_up_cfg() if the GENET has been reset. This commit adds a check of the MPD_EN flag when Wake on Magic Packet is enabled. A clear flag indicates that the GENET hardware must have been reset so the remainder of the hardware programming is bypassed. Fixes: 1a1d5106c1e3 ("net: bcmgenet: move clk_wol management to bcmgenet_wol") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 4ea6a26b04f701..def990dbf34fee 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -217,11 +217,16 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, priv->wol_active = 0; clk_disable_unprepare(priv->clk_wol); + priv->crc_fwd_en = 0; /* Disable Magic Packet Detection */ - reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); - reg &= ~(MPD_EN | MPD_PW_EN); - bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL); + if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { + reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); + if (!(reg & MPD_EN)) + return; /* already reset so skip the rest */ + reg &= ~(MPD_EN | MPD_PW_EN); + bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL); + } /* Disable WAKE_FILTER Detection */ reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); @@ -232,5 +237,4 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); - priv->crc_fwd_en = 0; } From 3d653adb4b4955addad8c3accd33e22cb99a445b Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 16 Jul 2020 16:38:16 -0700 Subject: [PATCH 038/131] net: bcmgenet: test RBUF_ACPI_EN when resuming When the GENET driver resumes from deep sleep the UMAC_CMD register may not be accessible and therefore should not be accessed from bcmgenet_wol_power_up_cfg() if the GENET has been reset. This commit adds a check of the RBUF_ACPI_EN flag when Wake on Filter is enabled. A clear flag indicates that the GENET hardware must have been reset so the remainder of the hardware programming is bypassed. Fixes: f50932cca632 ("net: bcmgenet: add WAKE_FILTER support") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index def990dbf34fee..1c86eddb1b510c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -229,9 +229,13 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, } /* Disable WAKE_FILTER Detection */ - reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - reg &= ~(RBUF_HFB_EN | RBUF_ACPI_EN); - bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + if (priv->wolopts & WAKE_FILTER) { + reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); + if (!(reg & RBUF_ACPI_EN)) + return; /* already reset so skip the rest */ + reg &= ~(RBUF_HFB_EN | RBUF_ACPI_EN); + bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL); + } /* Disable CRC Forward */ reg = bcmgenet_umac_readl(priv, UMAC_CMD); From a8c64542b478e61fa17661803b590ed276205914 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 16 Jul 2020 16:38:17 -0700 Subject: [PATCH 039/131] net: bcmgenet: restore HFB filters on resume The Hardware Filter Block RAM may not be preserved when the GENET block is reset during a deep sleep, so it is not sufficient to only backup and restore the enables. This commit clears out the HFB block and reprograms the rxnfc rules when the system resumes from a suspended state. To support this the bcmgenet_hfb_create_rxnfc_filter() function is modified to access the register space directly so that it can't fail due to memory allocation issues. Fixes: f50932cca632 ("net: bcmgenet: add WAKE_FILTER support") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 138 ++++++++---------- .../net/ethernet/broadcom/genet/bcmgenet.h | 1 - 2 files changed, 62 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index af924a8b885fda..368e05b16ae9e9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -543,14 +543,14 @@ static int bcmgenet_hfb_validate_mask(void *mask, size_t size) #define VALIDATE_MASK(x) \ bcmgenet_hfb_validate_mask(&(x), sizeof(x)) -static int bcmgenet_hfb_insert_data(u32 *f, int offset, - void *val, void *mask, size_t size) +static int bcmgenet_hfb_insert_data(struct bcmgenet_priv *priv, u32 f_index, + u32 offset, void *val, void *mask, + size_t size) { - int index; - u32 tmp; + u32 index, tmp; - index = offset / 2; - tmp = f[index]; + index = f_index * priv->hw_params->hfb_filter_size + offset / 2; + tmp = bcmgenet_hfb_readl(priv, index * sizeof(u32)); while (size--) { if (offset++ & 1) { @@ -567,9 +567,10 @@ static int bcmgenet_hfb_insert_data(u32 *f, int offset, tmp |= 0x10000; break; } - f[index++] = tmp; + bcmgenet_hfb_writel(priv, tmp, index++ * sizeof(u32)); if (size) - tmp = f[index]; + tmp = bcmgenet_hfb_readl(priv, + index * sizeof(u32)); } else { tmp &= ~0xCFF00; tmp |= (*(unsigned char *)val++) << 8; @@ -585,44 +586,26 @@ static int bcmgenet_hfb_insert_data(u32 *f, int offset, break; } if (!size) - f[index] = tmp; + bcmgenet_hfb_writel(priv, tmp, index * sizeof(u32)); } } return 0; } -static void bcmgenet_hfb_set_filter(struct bcmgenet_priv *priv, u32 *f_data, - u32 f_length, u32 rx_queue, int f_index) -{ - u32 base = f_index * priv->hw_params->hfb_filter_size; - int i; - - for (i = 0; i < f_length; i++) - bcmgenet_hfb_writel(priv, f_data[i], (base + i) * sizeof(u32)); - - bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length); - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue); -} - -static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, - struct bcmgenet_rxnfc_rule *rule) +static void bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, + struct bcmgenet_rxnfc_rule *rule) { struct ethtool_rx_flow_spec *fs = &rule->fs; - int err = 0, offset = 0, f_length = 0; + u32 offset = 0, f_length = 0, f; u8 val_8, mask_8; __be16 val_16; u16 mask_16; size_t size; - u32 *f_data; - - f_data = kcalloc(priv->hw_params->hfb_filter_size, sizeof(u32), - GFP_KERNEL); - if (!f_data) - return -ENOMEM; + f = fs->location; if (fs->flow_type & FLOW_MAC_EXT) { - bcmgenet_hfb_insert_data(f_data, 0, + bcmgenet_hfb_insert_data(priv, f, 0, &fs->h_ext.h_dest, &fs->m_ext.h_dest, sizeof(fs->h_ext.h_dest)); } @@ -630,11 +613,11 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, if (fs->flow_type & FLOW_EXT) { if (fs->m_ext.vlan_etype || fs->m_ext.vlan_tci) { - bcmgenet_hfb_insert_data(f_data, 12, + bcmgenet_hfb_insert_data(priv, f, 12, &fs->h_ext.vlan_etype, &fs->m_ext.vlan_etype, sizeof(fs->h_ext.vlan_etype)); - bcmgenet_hfb_insert_data(f_data, 14, + bcmgenet_hfb_insert_data(priv, f, 14, &fs->h_ext.vlan_tci, &fs->m_ext.vlan_tci, sizeof(fs->h_ext.vlan_tci)); @@ -646,15 +629,15 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: f_length += DIV_ROUND_UP(ETH_HLEN, 2); - bcmgenet_hfb_insert_data(f_data, 0, + bcmgenet_hfb_insert_data(priv, f, 0, &fs->h_u.ether_spec.h_dest, &fs->m_u.ether_spec.h_dest, sizeof(fs->h_u.ether_spec.h_dest)); - bcmgenet_hfb_insert_data(f_data, ETH_ALEN, + bcmgenet_hfb_insert_data(priv, f, ETH_ALEN, &fs->h_u.ether_spec.h_source, &fs->m_u.ether_spec.h_source, sizeof(fs->h_u.ether_spec.h_source)); - bcmgenet_hfb_insert_data(f_data, (2 * ETH_ALEN) + offset, + bcmgenet_hfb_insert_data(priv, f, (2 * ETH_ALEN) + offset, &fs->h_u.ether_spec.h_proto, &fs->m_u.ether_spec.h_proto, sizeof(fs->h_u.ether_spec.h_proto)); @@ -664,21 +647,21 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, /* Specify IP Ether Type */ val_16 = htons(ETH_P_IP); mask_16 = 0xFFFF; - bcmgenet_hfb_insert_data(f_data, (2 * ETH_ALEN) + offset, + bcmgenet_hfb_insert_data(priv, f, (2 * ETH_ALEN) + offset, &val_16, &mask_16, sizeof(val_16)); - bcmgenet_hfb_insert_data(f_data, 15 + offset, + bcmgenet_hfb_insert_data(priv, f, 15 + offset, &fs->h_u.usr_ip4_spec.tos, &fs->m_u.usr_ip4_spec.tos, sizeof(fs->h_u.usr_ip4_spec.tos)); - bcmgenet_hfb_insert_data(f_data, 23 + offset, + bcmgenet_hfb_insert_data(priv, f, 23 + offset, &fs->h_u.usr_ip4_spec.proto, &fs->m_u.usr_ip4_spec.proto, sizeof(fs->h_u.usr_ip4_spec.proto)); - bcmgenet_hfb_insert_data(f_data, 26 + offset, + bcmgenet_hfb_insert_data(priv, f, 26 + offset, &fs->h_u.usr_ip4_spec.ip4src, &fs->m_u.usr_ip4_spec.ip4src, sizeof(fs->h_u.usr_ip4_spec.ip4src)); - bcmgenet_hfb_insert_data(f_data, 30 + offset, + bcmgenet_hfb_insert_data(priv, f, 30 + offset, &fs->h_u.usr_ip4_spec.ip4dst, &fs->m_u.usr_ip4_spec.ip4dst, sizeof(fs->h_u.usr_ip4_spec.ip4dst)); @@ -688,11 +671,11 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, /* Only supports 20 byte IPv4 header */ val_8 = 0x45; mask_8 = 0xFF; - bcmgenet_hfb_insert_data(f_data, ETH_HLEN + offset, + bcmgenet_hfb_insert_data(priv, f, ETH_HLEN + offset, &val_8, &mask_8, sizeof(val_8)); size = sizeof(fs->h_u.usr_ip4_spec.l4_4_bytes); - bcmgenet_hfb_insert_data(f_data, + bcmgenet_hfb_insert_data(priv, f, ETH_HLEN + 20 + offset, &fs->h_u.usr_ip4_spec.l4_4_bytes, &fs->m_u.usr_ip4_spec.l4_4_bytes, @@ -701,34 +684,42 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv, break; } + bcmgenet_hfb_set_filter_length(priv, f, 2 * f_length); if (!fs->ring_cookie || fs->ring_cookie == RX_CLS_FLOW_WAKE) { /* Ring 0 flows can be handled by the default Descriptor Ring * We'll map them to ring 0, but don't enable the filter */ - bcmgenet_hfb_set_filter(priv, f_data, f_length, 0, - fs->location); + bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f, 0); rule->state = BCMGENET_RXNFC_STATE_DISABLED; } else { /* Other Rx rings are direct mapped here */ - bcmgenet_hfb_set_filter(priv, f_data, f_length, - fs->ring_cookie, fs->location); - bcmgenet_hfb_enable_filter(priv, fs->location); + bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f, + fs->ring_cookie); + bcmgenet_hfb_enable_filter(priv, f); rule->state = BCMGENET_RXNFC_STATE_ENABLED; } - - kfree(f_data); - - return err; } /* bcmgenet_hfb_clear * * Clear Hardware Filter Block and disable all filtering. */ +static void bcmgenet_hfb_clear_filter(struct bcmgenet_priv *priv, u32 f_index) +{ + u32 base, i; + + base = f_index * priv->hw_params->hfb_filter_size; + for (i = 0; i < priv->hw_params->hfb_filter_size; i++) + bcmgenet_hfb_writel(priv, 0x0, (base + i) * sizeof(u32)); +} + static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv) { u32 i; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) + return; + bcmgenet_hfb_reg_writel(priv, 0x0, HFB_CTRL); bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS); bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS + 4); @@ -740,19 +731,18 @@ static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv) bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_LEN_V3PLUS + i * sizeof(u32)); - for (i = 0; i < priv->hw_params->hfb_filter_cnt * - priv->hw_params->hfb_filter_size; i++) - bcmgenet_hfb_writel(priv, 0x0, i * sizeof(u32)); + for (i = 0; i < priv->hw_params->hfb_filter_cnt; i++) + bcmgenet_hfb_clear_filter(priv, i); } static void bcmgenet_hfb_init(struct bcmgenet_priv *priv) { int i; + INIT_LIST_HEAD(&priv->rxnfc_list); if (GENET_IS_V1(priv) || GENET_IS_V2(priv)) return; - INIT_LIST_HEAD(&priv->rxnfc_list); for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { INIT_LIST_HEAD(&priv->rxnfc_rules[i].list); priv->rxnfc_rules[i].state = BCMGENET_RXNFC_STATE_UNUSED; @@ -1437,18 +1427,15 @@ static int bcmgenet_insert_flow(struct net_device *dev, loc_rule = &priv->rxnfc_rules[cmd->fs.location]; if (loc_rule->state == BCMGENET_RXNFC_STATE_ENABLED) bcmgenet_hfb_disable_filter(priv, cmd->fs.location); - if (loc_rule->state != BCMGENET_RXNFC_STATE_UNUSED) + if (loc_rule->state != BCMGENET_RXNFC_STATE_UNUSED) { list_del(&loc_rule->list); + bcmgenet_hfb_clear_filter(priv, cmd->fs.location); + } loc_rule->state = BCMGENET_RXNFC_STATE_UNUSED; memcpy(&loc_rule->fs, &cmd->fs, sizeof(struct ethtool_rx_flow_spec)); - err = bcmgenet_hfb_create_rxnfc_filter(priv, loc_rule); - if (err) { - netdev_err(dev, "rxnfc: Could not install rule (%d)\n", - err); - return err; - } + bcmgenet_hfb_create_rxnfc_filter(priv, loc_rule); list_add_tail(&loc_rule->list, &priv->rxnfc_list); @@ -1473,8 +1460,10 @@ static int bcmgenet_delete_flow(struct net_device *dev, if (rule->state == BCMGENET_RXNFC_STATE_ENABLED) bcmgenet_hfb_disable_filter(priv, cmd->fs.location); - if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) + if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) { list_del(&rule->list); + bcmgenet_hfb_clear_filter(priv, cmd->fs.location); + } rule->state = BCMGENET_RXNFC_STATE_UNUSED; memset(&rule->fs, 0, sizeof(struct ethtool_rx_flow_spec)); @@ -4129,8 +4118,9 @@ static int bcmgenet_resume(struct device *d) { struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); + struct bcmgenet_rxnfc_rule *rule; unsigned long dma_ctrl; - u32 offset, reg; + u32 reg; int ret; if (!netif_running(dev)) @@ -4161,10 +4151,11 @@ static int bcmgenet_resume(struct device *d) bcmgenet_set_hw_addr(priv, dev->dev_addr); - offset = HFB_FLT_ENABLE_V3PLUS; - bcmgenet_hfb_reg_writel(priv, priv->hfb_en[1], offset); - bcmgenet_hfb_reg_writel(priv, priv->hfb_en[2], offset + sizeof(u32)); - bcmgenet_hfb_reg_writel(priv, priv->hfb_en[0], HFB_CTRL); + /* Restore hardware filters */ + bcmgenet_hfb_clear(priv); + list_for_each_entry(rule, &priv->rxnfc_list, list) + if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) + bcmgenet_hfb_create_rxnfc_filter(priv, rule); if (priv->internal_phy) { reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); @@ -4208,7 +4199,6 @@ static int bcmgenet_suspend(struct device *d) { struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); - u32 offset; if (!netif_running(dev)) return 0; @@ -4220,11 +4210,7 @@ static int bcmgenet_suspend(struct device *d) if (!device_may_wakeup(d)) phy_suspend(dev->phydev); - /* Preserve filter state and disable filtering */ - priv->hfb_en[0] = bcmgenet_hfb_reg_readl(priv, HFB_CTRL); - offset = HFB_FLT_ENABLE_V3PLUS; - priv->hfb_en[1] = bcmgenet_hfb_reg_readl(priv, offset); - priv->hfb_en[2] = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32)); + /* Disable filtering */ bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL); return 0; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index a12cb59298f435..f6ca01da141d4b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -696,7 +696,6 @@ struct bcmgenet_priv { u32 wolopts; u8 sopass[SOPASS_MAX]; bool wol_active; - u32 hfb_en[3]; struct bcmgenet_mib_counters mib; From 0b4a66a389d1ff5dab29f688fcfe36482bc889a2 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 17 Jul 2020 15:10:16 +0800 Subject: [PATCH 040/131] nfc: nci: add missed destroy_workqueue in nci_register_device When nfc_register_device fails in nci_register_device, destroy_workqueue() shouled be called to destroy ndev->tx_wq. Fixes: 3c1c0f5dc80b ("NFC: NCI: Fix nci_register_device init sequence") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7cd5248843041a..78ea8c94dcba05 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1228,10 +1228,13 @@ int nci_register_device(struct nci_dev *ndev) rc = nfc_register_device(ndev->nfc_dev); if (rc) - goto destroy_rx_wq_exit; + goto destroy_tx_wq_exit; goto exit; +destroy_tx_wq_exit: + destroy_workqueue(ndev->tx_wq); + destroy_rx_wq_exit: destroy_workqueue(ndev->rx_wq); From 23e500e88723f243c27e1b26c9d035d4cdd1b24a Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Fri, 17 Jul 2020 23:39:49 +0300 Subject: [PATCH 041/131] net: atlantic: disable PTP on AQC111, AQC112 This patch disables PTP on AQC111 and AQC112 due to a known HW issue, which can cause datapath issues. Ideally PTP block should have been disabled via PHY provisioning, but unfortunately many units have been shipped with enabled PTP block. Thus, we have to work around this in the driver. Fixes: dbcd6806af420 ("net: aquantia: add support for Phy access") Signed-off-by: Nikita Danilov Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_hw.h | 1 + .../net/ethernet/aquantia/atlantic/aq_nic.c | 9 ++++++ .../net/ethernet/aquantia/atlantic/aq_nic.h | 2 ++ .../net/ethernet/aquantia/atlantic/aq_phy.c | 29 +++++++++++++++++-- .../net/ethernet/aquantia/atlantic/aq_phy.h | 8 +++-- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 19 ++++++++++++ .../aquantia/atlantic/hw_atl/hw_atl_b0.h | 10 +++---- 7 files changed, 68 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index ed5b465bc6640d..992fedbe4ce3c2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -64,6 +64,7 @@ struct aq_hw_caps_s { u8 rx_rings; bool flow_control; bool is_64_dma; + u32 quirks; u32 priv_data_len; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 4435c6374f7e05..7c7bf6bf163f61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -415,6 +415,15 @@ int aq_nic_init(struct aq_nic_s *self) self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_TP) { self->aq_hw->phy_id = HW_ATL_PHY_ID_MAX; err = aq_phy_init(self->aq_hw); + + /* Disable the PTP on NICs where it's known to cause datapath + * problems. + * Ideally this should have been done by PHY provisioning, but + * many units have been shipped with enabled PTP block already. + */ + if (self->aq_nic_cfg.aq_hw_caps->quirks & AQ_NIC_QUIRK_BAD_PTP) + if (self->aq_hw->phy_id != HW_ATL_PHY_ID_MAX) + aq_phy_disable_ptp(self->aq_hw); } for (i = 0U; i < self->aq_vecs; i++) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 2ab003065e6245..439ce9692dac2e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -81,6 +81,8 @@ struct aq_nic_cfg_s { #define AQ_NIC_FLAG_ERR_UNPLUG 0x40000000U #define AQ_NIC_FLAG_ERR_HW 0x80000000U +#define AQ_NIC_QUIRK_BAD_PTP BIT(0) + #define AQ_NIC_WOL_MODES (WAKE_MAGIC |\ WAKE_PHY) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.c b/drivers/net/ethernet/aquantia/atlantic/aq_phy.c index 51ae921e3e1f4a..949ac23517017d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_phy.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_phy.c @@ -1,10 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only -/* aQuantia Corporation Network Driver - * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved +/* Atlantic Network Driver + * + * Copyright (C) 2018-2019 aQuantia Corporation + * Copyright (C) 2019-2020 Marvell International Ltd. */ #include "aq_phy.h" +#define HW_ATL_PTP_DISABLE_MSK BIT(10) + bool aq_mdio_busy_wait(struct aq_hw_s *aq_hw) { int err = 0; @@ -145,3 +149,24 @@ bool aq_phy_init(struct aq_hw_s *aq_hw) return true; } + +void aq_phy_disable_ptp(struct aq_hw_s *aq_hw) +{ + static const u16 ptp_registers[] = { + 0x031e, + 0x031d, + 0x031c, + 0x031b, + }; + u16 val; + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_registers); i++) { + val = aq_phy_read_reg(aq_hw, MDIO_MMD_VEND1, + ptp_registers[i]); + + aq_phy_write_reg(aq_hw, MDIO_MMD_VEND1, + ptp_registers[i], + val & ~HW_ATL_PTP_DISABLE_MSK); + } +} diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.h b/drivers/net/ethernet/aquantia/atlantic/aq_phy.h index 84b72ad04a4ab8..86cc1ee836e2de 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_phy.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_phy.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* aQuantia Corporation Network Driver - * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved +/* Atlantic Network Driver + * + * Copyright (C) 2018-2019 aQuantia Corporation + * Copyright (C) 2019-2020 Marvell International Ltd. */ #ifndef AQ_PHY_H @@ -29,4 +31,6 @@ bool aq_phy_init_phy_id(struct aq_hw_s *aq_hw); bool aq_phy_init(struct aq_hw_s *aq_hw); +void aq_phy_disable_ptp(struct aq_hw_s *aq_hw); + #endif /* AQ_PHY_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 14d79f70cad779..d2bc6b289a5458 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -93,6 +93,25 @@ const struct aq_hw_caps_s hw_atl_b0_caps_aqc109 = { AQ_NIC_RATE_100M, }; +const struct aq_hw_caps_s hw_atl_b0_caps_aqc111 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_5G | + AQ_NIC_RATE_2G5 | + AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M, + .quirks = AQ_NIC_QUIRK_BAD_PTP, +}; + +const struct aq_hw_caps_s hw_atl_b0_caps_aqc112 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_2G5 | + AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M, + .quirks = AQ_NIC_QUIRK_BAD_PTP, +}; + static int hw_atl_b0_hw_reset(struct aq_hw_s *self) { int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h index 30f468f2084d5f..16091af1798048 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h @@ -18,17 +18,15 @@ extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc100; extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc107; extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc108; extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc109; - -#define hw_atl_b0_caps_aqc111 hw_atl_b0_caps_aqc108 -#define hw_atl_b0_caps_aqc112 hw_atl_b0_caps_aqc109 +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc111; +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc112; #define hw_atl_b0_caps_aqc100s hw_atl_b0_caps_aqc100 #define hw_atl_b0_caps_aqc107s hw_atl_b0_caps_aqc107 #define hw_atl_b0_caps_aqc108s hw_atl_b0_caps_aqc108 #define hw_atl_b0_caps_aqc109s hw_atl_b0_caps_aqc109 - -#define hw_atl_b0_caps_aqc111s hw_atl_b0_caps_aqc108 -#define hw_atl_b0_caps_aqc112s hw_atl_b0_caps_aqc109 +#define hw_atl_b0_caps_aqc111s hw_atl_b0_caps_aqc111 +#define hw_atl_b0_caps_aqc112s hw_atl_b0_caps_aqc112 extern const struct aq_hw_ops hw_atl_ops_b0; From 2ccb0161a0e9eb06f538557d38987e436fc39b8d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 18 Jul 2020 01:32:21 +0200 Subject: [PATCH 042/131] net: macb: use phy_interface_mode_is_rgmii everywhere There is one RGMII check not using the phy_interface_mode_is_rgmii() helper. This prevents the driver from configuring the MAC properly when using a phy-mode that is not just rgmii, e.g. rgmii-rxid. This became an issue on sama5d3 xplained since the ksz9031 driver is hadling phy-mode properly and the phy-mode has to be set to rgmii-rxid. Fixes: bcf3440c6dd78bfe ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f1f0976e7669a8..2213e6ab81512f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3736,7 +3736,7 @@ static int macb_init(struct platform_device *pdev) if (!(bp->caps & MACB_CAPS_USRIO_DISABLED)) { val = 0; - if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(bp->phy_interface)) val = GEM_BIT(RGMII); else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII && (bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) From 7f5f81406e2b36785f1e25fe5209edd9dd3610d7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:37:25 -0700 Subject: [PATCH 043/131] rhashtable: drop duplicated word in Drop the doubled word "be" in a comment. Signed-off-by: Randy Dunlap Cc: Thomas Graf Cc: Herbert Xu Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 70ebef866cc82a..d3432ee65de768 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -33,7 +33,7 @@ * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has * the least significant bit set but otherwise stores the address of - * the hash bucket. This allows us to be be sure we've found the end + * the hash bucket. This allows us to be sure we've found the end * of the right list. * The value stored in the hash bucket has BIT(0) used as a lock bit. * This bit must be atomically set before any changes are made to From 6d6148bc78d2f002ecc67b801c5f55a0cc5184c8 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Fri, 17 Jul 2020 10:55:09 -0400 Subject: [PATCH 044/131] net: hsr: fix incorrect lsdu size in the tag of HSR frames for small frames For small Ethernet frames with size less than minimum size 66 for HSR vs 60 for regular Ethernet frames, hsr driver currently doesn't pad the frame to make it minimum size. This results in incorrect LSDU size being populated in the HSR tag for these frames. Fix this by padding the frame to the minimum size applicable for HSR. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_forward.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ed13760463decf..e42fd356f07355 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -127,6 +127,9 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, int lane_id; int lsdu_size; + /* pad to minimum packet size which is 60 + 6 (HSR tag) */ + skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + if (port->type == HSR_PT_SLAVE_A) lane_id = 0; else From eea9f73e1ff9b99fbb937d41bb07c5bb2ae9ea2f Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Fri, 17 Jul 2020 10:55:10 -0400 Subject: [PATCH 045/131] net: hsr: validate address B before copying to skb Validate MAC address before copying the same to outgoing frame skb destination address. Since a node can have zero mac address for Link B until a valid frame is received over that link, this fix address the issue of a zero MAC address being in the packet. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 03b89190431443..530de24b1fb573 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -325,7 +325,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (port->type != node_dst->addr_B_port) return; - ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); + if (is_valid_ether_addr(node_dst->macaddress_B)) + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); } void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, From 9b8737788af6c76ef93e3161ee2cdc4ddcc034ca Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 17 Jul 2020 22:01:43 +0300 Subject: [PATCH 046/131] mlxsw: core: Fix wrong SFP EEPROM reading for upper pages 1-3 Fix wrong reading of upper pages for SFP EEPROM. According to "Memory Organization" figure in SFF-8472 spec: When reading upper pages 1, 2 and 3 the offset should be set relative to zero and I2C high address 0x51 [1010001X (A2h)] is to be used. Fixes: a45bfb5a5070 ("mlxsw: core: Extend QSFP EEPROM size for ethtool") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_env.c | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 08215fed193d30..a7d86df7123ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -45,7 +45,7 @@ static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, static int mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, u16 offset, u16 size, void *data, - unsigned int *p_read_size) + bool qsfp, unsigned int *p_read_size) { char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; char mcia_pl[MLXSW_REG_MCIA_LEN]; @@ -54,6 +54,10 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, int status; int err; + /* MCIA register accepts buffer size <= 48. Page of size 128 should be + * read by chunks of size 48, 48, 32. Align the size of the last chunk + * to avoid reading after the end of the page. + */ size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && @@ -63,18 +67,25 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { - page = MLXSW_REG_MCIA_PAGE_GET(offset); - offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page; - /* When reading upper pages 1, 2 and 3 the offset starts at - * 128. Please refer to "QSFP+ Memory Map" figure in SFF-8436 - * specification for graphical depiction. - * MCIA register accepts buffer size <= 48. Page of size 128 - * should be read by chunks of size 48, 48, 32. Align the size - * of the last chunk to avoid reading after the end of the - * page. - */ - if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) - size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; + if (qsfp) { + /* When reading upper pages 1, 2 and 3 the offset + * starts at 128. Please refer to "QSFP+ Memory Map" + * figure in SFF-8436 specification for graphical + * depiction. + */ + page = MLXSW_REG_MCIA_PAGE_GET(offset); + offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page; + if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; + } else { + /* When reading upper pages 1, 2 and 3 the offset + * starts at 0 and I2C high address is used. Please refer + * refer to "Memory Organization" figure in SFF-8472 + * specification for graphical depiction. + */ + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; + offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; + } } mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr); @@ -166,7 +177,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, int err; err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset, - module_info, &read_size); + module_info, false, &read_size); if (err) return err; @@ -197,7 +208,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, /* Verify if transceiver provides diagnostic monitoring page */ err = mlxsw_env_query_module_eeprom(mlxsw_core, module, SFP_DIAGMON, 1, &diag_mon, - &read_size); + false, &read_size); if (err) return err; @@ -225,17 +236,22 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev, int offset = ee->offset; unsigned int read_size; int i = 0; + bool qsfp; int err; if (!ee->len) return -EINVAL; memset(data, 0, ee->len); + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(mlxsw_core, module, &qsfp); + if (err) + return err; while (i < ee->len) { err = mlxsw_env_query_module_eeprom(mlxsw_core, module, offset, ee->len - i, data + i, - &read_size); + qsfp, &read_size); if (err) { netdev_err(netdev, "Eeprom query failed\n"); return err; From a35fffbf98189ba8359f19073286b2ea816255c5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:09 +0200 Subject: [PATCH 047/131] net/smc: handle unexpected response types for confirm link A delete link could arrive during confirm link processing. Handle this situation directly in smc_llc_srv_conf_link() rather than using the logic in smc_llc_wait() to avoid the unexpected message handling there. Reviewed-by: Ursula Braun Fixes: 1551c95b6124 ("net/smc: final part of add link processing as SMC server") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c1a038689c63cb..58f4da2e0cc7d3 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1051,12 +1051,14 @@ static int smc_llc_srv_conf_link(struct smc_link *link, if (rc) return -ENOLINK; /* receive CONFIRM LINK response over the RoCE fabric */ - qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, - SMC_LLC_CONFIRM_LINK); - if (!qentry) { + qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); + if (!qentry || + qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); + if (qentry) + smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } smc_llc_save_peer_uid(qentry); From 68fd8942038f30dbb64a594dc15d9948289de42a Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:10 +0200 Subject: [PATCH 048/131] net/smc: clear link during SMC client link down processing In a link-down condition we notify the SMC server and expect that the server will finally trigger the link clear processing on the client side. This could fail when anything along this notification path goes wrong. Clear the link as part of SMC client link-down processing to prevent dangling links. Reviewed-by: Ursula Braun Fixes: 541afa10c126 ("net/smc: add smcr_port_err() and smcr_link_down() processing") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f69d205b3e11e5..e286b3c8c9629f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1204,10 +1204,12 @@ static void smcr_link_down(struct smc_link *lnk) SMC_LLC_WAIT_TIME); mutex_lock(&lgr->llc_conf_mutex); } - if (!list_empty(&lgr->list)) + if (!list_empty(&lgr->list)) { smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, SMC_LLC_DEL_LOST_PATH); + smcr_link_clear(lnk, true); + } wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } From 7df8bcb56053173e5e5c0e566391fa601e3e4778 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:11 +0200 Subject: [PATCH 049/131] net/smc: fix link lookup for new rdma connections For new rdma connections the SMC server assigns the link and sends the link data in the clc accept message. To match the correct link use not only the qp_num but also the gid and the mac of the links. If there are equal qp_nums for different links the wrong link would be chosen. Reviewed-by: Ursula Braun Fixes: 0fb0b02bd6fd ("net/smc: adapt SMC client code to use the LLC flow") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9033215438384b..f80591567a3dbb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -632,7 +632,9 @@ static int smc_connect_rdma(struct smc_sock *smc, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *l = &smc->conn.lgr->lnk[i]; - if (l->peer_qpn == ntoh24(aclc->qpn)) { + if (l->peer_qpn == ntoh24(aclc->qpn) && + !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && + !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { link = l; break; } From 63673597cca93ef6fa12414933da01d5806547af Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:12 +0200 Subject: [PATCH 050/131] net/smc: protect smc ib device initialization Before an smc ib device is used the first time for an smc link it is lazily initialized. When there are 2 active link groups and a new ib device is brought online then it might happen that 2 link creations run in parallel and enter smc_ib_setup_per_ibdev(). Both allocate new send and receive completion queues on the device, but only one set of them keeps assigned and the other leaks. Fix that by protecting the setup and cleanup code using a mutex. Reviewed-by: Ursula Braun Fixes: f3c1deddb21c ("net/smc: separate function for link initialization") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 16 +++++++++++++--- net/smc/smc_ib.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 7637fdebbb78f7..1c314dbdc7faa4 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -506,6 +506,10 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) int cqe_size_order, smc_order; long rc; + mutex_lock(&smcibdev->mutex); + rc = 0; + if (smcibdev->initialized) + goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; smc_order = MAX_ORDER - cqe_size_order - 1; @@ -517,7 +521,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); if (IS_ERR(smcibdev->roce_cq_send)) { smcibdev->roce_cq_send = NULL; - return rc; + goto out; } smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, smc_wr_rx_cq_handler, NULL, @@ -529,21 +533,26 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) } smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; - return rc; + goto out; err: ib_destroy_cq(smcibdev->roce_cq_send); +out: + mutex_unlock(&smcibdev->mutex); return rc; } static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { + mutex_lock(&smcibdev->mutex); if (!smcibdev->initialized) - return; + goto out; smcibdev->initialized = 0; ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); smc_wr_remove_dev(smcibdev); +out: + mutex_unlock(&smcibdev->mutex); } static struct ib_client smc_ib_client; @@ -566,6 +575,7 @@ static int smc_ib_add_dev(struct ib_device *ibdev) INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); + mutex_init(&smcibdev->mutex); mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); mutex_unlock(&smc_ib_devices.mutex); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index ae6776e1e7264a..2ce481187dd0be 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -52,6 +52,7 @@ struct smc_ib_device { /* ib-device infos for smc */ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); atomic_t lnk_cnt; /* number of links on ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ + struct mutex mutex; /* protect dev setup+cleanup */ }; struct smc_buf_desc; From 2ff0867851a21ea1ccb0c275ae1df2fe7787e1b9 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:13 +0200 Subject: [PATCH 051/131] net/smc: drop out-of-flow llc response messages To be save from unexpected or late llc response messages check if the arrived message fits to the current flow type and drop out-of-flow messages. And drop it when there is already a response assigned to the flow. Reviewed-by: Ursula Braun Fixes: ef79d439cd12 ("net/smc: process llc responses in tasklet context") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 58f4da2e0cc7d3..78704f03e72a09 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1587,6 +1587,8 @@ static void smc_llc_event_work(struct work_struct *work) static void smc_llc_rx_response(struct smc_link *link, struct smc_llc_qentry *qentry) { + enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; + struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; u8 llc_type = qentry->msg.raw.hdr.common.type; switch (llc_type) { @@ -1595,15 +1597,20 @@ static void smc_llc_rx_response(struct smc_link *link, complete(&link->llc_testlink_resp); break; case SMC_LLC_ADD_LINK: - case SMC_LLC_DELETE_LINK: - case SMC_LLC_CONFIRM_LINK: case SMC_LLC_ADD_LINK_CONT: + case SMC_LLC_CONFIRM_LINK: + if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; + case SMC_LLC_DELETE_LINK: + if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY: case SMC_LLC_DELETE_RKEY: - /* assign responses to the local flow, we requested them */ - smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); - wake_up(&link->lgr->llc_msg_waiter); - return; + if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; @@ -1612,6 +1619,11 @@ static void smc_llc_rx_response(struct smc_link *link, break; } kfree(qentry); + return; +assign: + /* assign responses to the local flow, we requested them */ + smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); + wake_up(&link->lgr->llc_msg_waiter); } static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) From c48254fa48e5bad589fbbf1578dae960cedcafcf Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:14 +0200 Subject: [PATCH 052/131] net/smc: move add link processing for new device into llc layer When a new ib device is up smc will send an add link invitation to the peer if needed. This is currently done with rudimentary flow control. Under high workload these add link invitations can disturb other llc flows because they arrive unexpected. Fix this by integrating the invitations into the normal llc event flow and handle them as a flow. While at it, check for already assigned requests in the flow before the new add link request is assigned. Reviewed-by: Ursula Braun Fixes: 1f90a05d9ff9 ("net/smc: add smcr_port_add() and smcr_link_up() processing") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 80 ++++------------------------------------------ net/smc/smc_llc.c | 56 ++++++++++++++++++++++++++++---- net/smc/smc_llc.h | 2 +- 3 files changed, 58 insertions(+), 80 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e286b3c8c9629f..2e965de7412d69 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -45,18 +45,10 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); -struct smc_ib_up_work { - struct work_struct work; - struct smc_link_group *lgr; - struct smc_ib_device *smcibdev; - u8 ibport; -}; - static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); -static void smc_link_up_work(struct work_struct *work); static void smc_link_down_work(struct work_struct *work); /* return head of link group list and its lock for a given link group */ @@ -1106,67 +1098,23 @@ static void smc_conn_abort_work(struct work_struct *work) sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ } -/* link is up - establish alternate link if applicable */ -static void smcr_link_up(struct smc_link_group *lgr, - struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link *link = NULL; - - if (list_empty(&lgr->list) || - lgr->type == SMC_LGR_SYMMETRIC || - lgr->type == SMC_LGR_ASYMMETRIC_PEER) - return; - - if (lgr->role == SMC_SERV) { - /* trigger local add link processing */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_srv_add_link_local(link); - } else { - /* invite server to start add link processing */ - u8 gid[SMC_GID_SIZE]; - - if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, - NULL)) - return; - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* some other llc task is ongoing */ - wait_event_timeout(lgr->llc_flow_waiter, - (list_empty(&lgr->list) || - lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), - SMC_LLC_WAIT_TIME); - } - /* lgr or device no longer active? */ - if (!list_empty(&lgr->list) && - smc_ib_port_active(smcibdev, ibport)) - link = smc_llc_usable_link(lgr); - if (link) - smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], - gid, NULL, SMC_LLC_REQ); - wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ - } -} - void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) { - struct smc_ib_up_work *ib_work; struct smc_link_group *lgr, *n; list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + struct smc_link *link; + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN) || lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) continue; - ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); - if (!ib_work) - continue; - INIT_WORK(&ib_work->work, smc_link_up_work); - ib_work->lgr = lgr; - ib_work->smcibdev = smcibdev; - ib_work->ibport = ibport; - schedule_work(&ib_work->work); + + /* trigger local add link processing */ + link = smc_llc_usable_link(lgr); + if (link) + smc_llc_add_link_local(link); } } @@ -1249,20 +1197,6 @@ void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) } } -static void smc_link_up_work(struct work_struct *work) -{ - struct smc_ib_up_work *ib_work = container_of(work, - struct smc_ib_up_work, - work); - struct smc_link_group *lgr = ib_work->lgr; - - if (list_empty(&lgr->list)) - goto out; - smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); -out: - kfree(ib_work); -} - static void smc_link_down_work(struct work_struct *work) { struct smc_link *link = container_of(work, struct smc_link, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 78704f03e72a09..30da040ab5b6cf 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -895,6 +895,36 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) return rc; } +/* as an SMC client, invite server to start the add_link processing */ +static void smc_llc_cli_add_link_invite(struct smc_link *link, + struct smc_llc_qentry *qentry) +{ + struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_init_info ini; + + if (lgr->type == SMC_LGR_SYMMETRIC || + lgr->type == SMC_LGR_ASYMMETRIC_PEER) + goto out; + + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + if (!ini.ib_dev) + goto out; + + smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], + ini.ib_gid, NULL, SMC_LLC_REQ); +out: + kfree(qentry); +} + +static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) +{ + if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && + !llc->add_link.qp_mtu && !llc->add_link.link_num) + return true; + return false; +} + static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry; @@ -902,7 +932,10 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - smc_llc_cli_add_link(qentry->link, qentry); + if (smc_llc_is_local_add_link(&qentry->msg)) + smc_llc_cli_add_link_invite(qentry->link, qentry); + else + smc_llc_cli_add_link(qentry->link, qentry); mutex_unlock(&lgr->llc_conf_mutex); } @@ -1160,14 +1193,14 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) mutex_unlock(&lgr->llc_conf_mutex); } -/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */ -void smc_llc_srv_add_link_local(struct smc_link *link) +/* enqueue a local add_link req to trigger a new add_link flow */ +void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {0}; add_llc.hd.length = sizeof(add_llc); add_llc.hd.common.type = SMC_LLC_ADD_LINK; - /* no dev and port needed, we as server ignore client data anyway */ + /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1347,7 +1380,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) { /* trigger setup of asymm alt link */ - smc_llc_srv_add_link_local(lnk); + smc_llc_add_link_local(lnk); } out: mutex_unlock(&lgr->llc_conf_mutex); @@ -1476,7 +1509,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (list_empty(&lgr->list)) goto out; /* lgr is terminating */ if (lgr->role == SMC_CLNT) { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) { + if (smc_llc_is_local_add_link(llc)) { + if (lgr->llc_flow_lcl.type == + SMC_LLC_FLOW_ADD_LINK) + break; /* add_link in progress */ + if (smc_llc_flow_start(&lgr->llc_flow_lcl, + qentry)) { + schedule_work(&lgr->llc_add_link_work); + } + return; + } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index a5d2fe3eea61ff..cc00a2ec4e92e8 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -103,7 +103,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_srv_add_link(struct smc_link *link); -void smc_llc_srv_add_link_local(struct smc_link *link); +void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ From b9979c2e837926c87358024a95c67988477909b1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:15 +0200 Subject: [PATCH 053/131] net/smc: fix handling of delete link requests As smc client the delete link requests are assigned to the flow when _any_ flow is active. This may break other flows that do not expect delete link requests during their handling. Fix that by assigning the request only when an add link flow is active. With that fix the code for smc client and smc server is the same, so remove the separate handling. Reviewed-by: Ursula Braun Fixes: 9ec6bf19ec8b ("net/smc: llc_del_link_work and use the LLC flow for delete link") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 30da040ab5b6cf..fa8cd57a9b32cd 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1544,28 +1544,13 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) } break; case SMC_LLC_DELETE_LINK: - if (lgr->role == SMC_CLNT) { - /* server requests to delete this link, send response */ - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } - } else { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && - !lgr->llc_flow_lcl.qentry) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { + /* DEL LINK REQ during ADD LINK SEQ */ + smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); + wake_up(&lgr->llc_msg_waiter); + } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + schedule_work(&lgr->llc_del_link_work); } return; case SMC_LLC_CONFIRM_RKEY: From 741a49a4dc5fd7e61b37b259dde915083c2c5327 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:16 +0200 Subject: [PATCH 054/131] net/smc: do not call dma sync for unmapped memory The dma related ...sync_sg... functions check the link state before the dma function is actually called. But the check in smc_link_usable() allows links in ACTIVATING state which are not yet mapped to dma memory. Under high load it may happen that the sync_sg functions are called for such a link which results in an debug output like DMA-API: mlx5_core 0002:00:00.0: device driver tries to sync DMA memory it has not allocated [device address=0x0000000103370000] [size=65536 bytes] To fix that introduce a helper to check for the link state ACTIVE and use it where appropriate. And move the link state update to ACTIVATING to the end of smcr_link_init() when most initial setup is done. Reviewed-by: Ursula Braun Fixes: d854fcbfaeda ("net/smc: add new link state and related helpers") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- net/smc/smc_core.c | 15 +++++++-------- net/smc/smc_core.h | 5 +++++ net/smc/smc_llc.c | 10 +++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f80591567a3dbb..d091509b5982f2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -352,7 +352,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, */ mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); if (rc) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2e965de7412d69..42ba227f3e97b9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -318,7 +318,6 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, get_device(&ini->ib_dev->ibdev->dev); atomic_inc(&ini->ib_dev->lnk_cnt); - lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; lnk->link_idx = link_idx; @@ -354,6 +353,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, rc = smc_wr_create_link(lnk); if (rc) goto destroy_qp; + lnk->state = SMC_LNK_ACTIVATING; return 0; destroy_qp: @@ -542,8 +542,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, smc_wr_wakeup_tx_wait(from_lnk); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE || - i == from_lnk->link_idx) + if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx) continue; if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && from_lnk->ibport == lgr->lnk[i].ibport) { @@ -1269,7 +1268,7 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, return false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && @@ -1717,14 +1716,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -1736,7 +1735,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1750,7 +1749,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c3ff512fd8911f..1c4d5439d0ff28 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -349,6 +349,11 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_active(struct smc_link *lnk) +{ + return lnk->state == SMC_LNK_ACTIVE; +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index fa8cd57a9b32cd..df5b0a6ea8488f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -428,7 +428,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { link = &send_link->lgr->lnk[i]; - if (link->state == SMC_LNK_ACTIVE && link != send_link) { + if (smc_link_active(link) && link != send_link) { rkeyllc->rtoken[rtok_ix].link_id = link->link_id; rkeyllc->rtoken[rtok_ix].rmb_key = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); @@ -944,7 +944,7 @@ static int smc_llc_active_link_count(struct smc_link_group *lgr) int i, link_count = 0; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_active(&lgr->lnk[i])) continue; link_count++; } @@ -1622,7 +1622,7 @@ static void smc_llc_rx_response(struct smc_link *link, switch (llc_type) { case SMC_LLC_TEST_LINK: - if (link->state == SMC_LNK_ACTIVE) + if (smc_link_active(link)) complete(&link->llc_testlink_resp); break; case SMC_LLC_ADD_LINK: @@ -1706,7 +1706,7 @@ static void smc_llc_testlink_work(struct work_struct *work) u8 user_data[16] = { 0 }; int rc; - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; if (time_is_after_jiffies(expire_time)) { @@ -1718,7 +1718,7 @@ static void smc_llc_testlink_work(struct work_struct *work) /* receive TEST LINK response over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* link state changed */ if (rc <= 0) { smcr_link_down_cond_sched(link); From fd7f3a746582e8b17c48d4d8087d38c91f59ba67 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:17 +0200 Subject: [PATCH 055/131] net/smc: remove freed buffer from list Two buffers are allocated for each SMC connection. Each buffer is added to a buffer list after creation. When the second buffer allocation fails, the first buffer is freed but not deleted from the list. This might result in crashes when another connection picks up the freed buffer later and starts to work with it. Reviewed-by: Ursula Braun Fixes: 6511aad3f039 ("net/smc: change smc_buf_free function parameters") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 42ba227f3e97b9..ca3dc6af73af0a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1772,8 +1772,12 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) return rc; /* create rmb */ rc = __smc_buf_create(smc, is_smcd, true); - if (rc) + if (rc) { + mutex_lock(&smc->conn.lgr->sndbufs_lock); + list_del(&smc->conn.sndbuf_desc->list); + mutex_unlock(&smc->conn.lgr->sndbufs_lock); smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); + } return rc; } From 1ad24058335427d046b2e5666bcd15a62ad9e242 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sat, 18 Jul 2020 15:06:18 +0200 Subject: [PATCH 056/131] net/smc: fix restoring of fallback changes When a listen socket is closed then all non-accepted sockets in its accept queue are to be released. Inside __smc_release() the helper smc_restore_fallback_changes() restores the changes done to the socket without to check if the clcsocket has a file set. This can result in a crash. Fix this by checking the file pointer first. Reviewed-by: Ursula Braun Fixes: f536dffc0b79 ("net/smc: fix closing of fallback SMC sockets") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d091509b5982f2..1163d51196da99 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -126,8 +126,10 @@ EXPORT_SYMBOL_GPL(smc_proto6); static void smc_restore_fallback_changes(struct smc_sock *smc) { - smc->clcsock->file->private_data = smc->sk.sk_socket; - smc->clcsock->file = NULL; + if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ + smc->clcsock->file->private_data = smc->sk.sk_socket; + smc->clcsock->file = NULL; + } } static int __smc_release(struct smc_sock *smc) From fad58190c0ffd72c394722928cd3e919b6e18357 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 19 Jul 2020 12:00:35 +0100 Subject: [PATCH 057/131] net: dsa: mv88e6xxx: fix in-band AN link establishment If in-band negotiation or fixed-link modes are specified for a DSA port, the DSA code will force the link down during initialisation. For fixed-link mode, this is fine, as phylink will manage the link state. However, for in-band mode, phylink expects the PCS to detect link, which will not happen if the link is forced down. There is a related issue that in in-band mode, the link could come up while we are making configuration changes, so we should force the link down prior to reconfiguring the interface mode. This patch addresses both issues. Fixes: 3be98b2d5fbc ("net: dsa: Down cpu/dsa ports phylink will control") Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 22 +++++++++++++++++++--- drivers/net/dsa/mv88e6xxx/chip.h | 1 + 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 7627ea61e0ea82..fee16c947c2e8b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -664,8 +664,11 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, const struct phylink_link_state *state) { struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port *p; int err; + p = &chip->ports[port]; + /* FIXME: is this the correct test? If we're in fixed mode on an * internal port, why should we process this any different from * PHY mode? On the other hand, the port may be automedia between @@ -675,10 +678,14 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, return; mv88e6xxx_reg_lock(chip); - /* FIXME: should we force the link down here - but if we do, how - * do we restore the link force/unforce state? The driver layering - * gets in the way. + /* In inband mode, the link may come up at any time while the link + * is not forced down. Force the link down while we reconfigure the + * interface mode. */ + if (mode == MLO_AN_INBAND && p->interface != state->interface && + chip->info->ops->port_set_link) + chip->info->ops->port_set_link(chip, port, LINK_FORCED_DOWN); + err = mv88e6xxx_port_config_interface(chip, port, state->interface); if (err && err != -EOPNOTSUPP) goto err_unlock; @@ -691,6 +698,15 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, if (err > 0) err = 0; + /* Undo the forced down state above after completing configuration + * irrespective of its state on entry, which allows the link to come up. + */ + if (mode == MLO_AN_INBAND && p->interface != state->interface && + chip->info->ops->port_set_link) + chip->info->ops->port_set_link(chip, port, LINK_UNFORCED); + + p->interface = state->interface; + err_unlock: mv88e6xxx_reg_unlock(chip); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index e5430cf2ad711c..6476524e8239d8 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -232,6 +232,7 @@ struct mv88e6xxx_port { u64 atu_full_violation; u64 vtu_member_violation; u64 vtu_miss_violation; + phy_interface_t interface; u8 cmode; bool mirror_ingress; bool mirror_egress; From 7c6719a1aaca51ffd7cdf3905e70aa8313f6ef26 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 19 Jul 2020 12:00:40 +0100 Subject: [PATCH 058/131] arm64: dts: clearfog-gt-8k: fix switch link configuration The commit below caused a regression for clearfog-gt-8k, where the link between the switch and the host does not come up. Investigation revealed two issues: - MV88E6xxx DSA no longer allows an in-band link to come up as the link is programmed to be forced down. Commit "net: dsa: mv88e6xxx: fix in-band AN link establishment" addresses this. - The dts configured dissimilar link modes at each end of the host to switch link; the host was configured using a fixed link (so has no in-band status) and the switch was configured to expect in-band status. With both issues fixed, the regression is resolved. Fixes: 34b5e6a33c1a ("net: dsa: mv88e6xxx: Configure MAC when using fixed link") Reported-by: Martin Rowe Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index c8243da710414d..eb01cc96ba7a3a 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -454,10 +454,7 @@ status = "okay"; phy-mode = "2500base-x"; phys = <&cp1_comphy5 2>; - fixed-link { - speed = <2500>; - full-duplex; - }; + managed = "in-band-status"; }; &cp1_spi1 { From 544f287b84959203367cd29e16e772717612fab4 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 19 Jul 2020 12:11:24 +0000 Subject: [PATCH 059/131] bonding: check error value of register_netdevice() immediately If register_netdevice() is failed, net_device should not be used because variables are uninitialized or freed. So, the routine should be stopped immediately. But, bond_create() doesn't check return value of register_netdevice() immediately. That will result in a panic because of using uninitialized or freed memory. Test commands: modprobe netdev-notifier-error-inject echo -22 > /sys/kernel/debug/notifier-error-inject/netdev/\ actions/NETDEV_REGISTER/error modprobe bonding max_bonds=3 Splat looks like: [ 375.028492][ T193] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b6b: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [ 375.033207][ T193] CPU: 2 PID: 193 Comm: kworker/2:2 Not tainted 5.8.0-rc4+ #645 [ 375.036068][ T193] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 375.039673][ T193] Workqueue: events linkwatch_event [ 375.041557][ T193] RIP: 0010:dev_activate+0x4a/0x340 [ 375.043381][ T193] Code: 40 a8 04 0f 85 db 00 00 00 8b 83 08 04 00 00 85 c0 0f 84 0d 01 00 00 31 d2 89 d0 48 8d 04 40 48 c1 e0 07 48 03 83 00 04 00 00 <48> 8b 48 10 f6 41 10 01 75 08 f0 80 a1 a0 01 00 00 fd 48 89 48 08 [ 375.050267][ T193] RSP: 0018:ffff9f8facfcfdd8 EFLAGS: 00010202 [ 375.052410][ T193] RAX: 6b6b6b6b6b6b6b6b RBX: ffff9f8fae6ea000 RCX: 0000000000000006 [ 375.055178][ T193] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9f8fae6ea000 [ 375.057762][ T193] RBP: ffff9f8fae6ea000 R08: 0000000000000000 R09: 0000000000000000 [ 375.059810][ T193] R10: 0000000000000001 R11: 0000000000000000 R12: ffff9f8facfcfe08 [ 375.061892][ T193] R13: ffffffff883587e0 R14: 0000000000000000 R15: ffff9f8fae6ea580 [ 375.063931][ T193] FS: 0000000000000000(0000) GS:ffff9f8fbae00000(0000) knlGS:0000000000000000 [ 375.066239][ T193] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 375.067841][ T193] CR2: 00007f2f542167a0 CR3: 000000012cee6002 CR4: 00000000003606e0 [ 375.069657][ T193] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 375.071471][ T193] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 375.073269][ T193] Call Trace: [ 375.074005][ T193] linkwatch_do_dev+0x4d/0x50 [ 375.075052][ T193] __linkwatch_run_queue+0x10b/0x200 [ 375.076244][ T193] linkwatch_event+0x21/0x30 [ 375.077274][ T193] process_one_work+0x252/0x600 [ 375.078379][ T193] ? process_one_work+0x600/0x600 [ 375.079518][ T193] worker_thread+0x3c/0x380 [ 375.080534][ T193] ? process_one_work+0x600/0x600 [ 375.081668][ T193] kthread+0x139/0x150 [ 375.082567][ T193] ? kthread_park+0x90/0x90 [ 375.083567][ T193] ret_from_fork+0x22/0x30 Fixes: e826eafa65c6 ("bonding: Call netif_carrier_off after register_netdevice") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 004919aea5fbf8..f88cb097b022ab 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5053,15 +5053,19 @@ int bond_create(struct net *net, const char *name) bond_dev->rtnl_link_ops = &bond_link_ops; res = register_netdevice(bond_dev); + if (res < 0) { + free_netdev(bond_dev); + rtnl_unlock(); + + return res; + } netif_carrier_off(bond_dev); bond_work_init_all(bond); rtnl_unlock(); - if (res < 0) - free_netdev(bond_dev); - return res; + return 0; } static int __net_init bond_net_init(struct net *net) From 19dc36548be2027cb5a491511bc152493c1244bb Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Sun, 19 Jul 2020 13:31:42 +0200 Subject: [PATCH 060/131] net: ieee802154: adf7242: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Link: https://lore.kernel.org/r/20200719113142.58304-1-grandmaster@al2klimov.de Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 8dbccec6ac8669..c11f32f644db3c 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -4,7 +4,7 @@ * * Copyright 2009-2017 Analog Devices Inc. * - * http://www.analog.com/ADF7242 + * https://www.analog.com/ADF7242 */ #include From 92f53e2fda8bb9a559ad61d57bfb397ce67ed0ab Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Sat, 11 Jul 2020 05:33:24 +0100 Subject: [PATCH 061/131] ath9k: Fix regression with Atheros 9271 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix allows ath9k_htc modules to connect to WLAN once again. Fixes: 2bbcaaee1fcb ("ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb") Link: https://bugzilla.kernel.org/show_bug.cgi?id=208251 Signed-off-by: Mark O'Donovan Reported-by: Roman Mamedov Tested-by: Viktor Jägersküpper Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200711043324.8079-1-shiftee@posteo.net --- drivers/net/wireless/ath/ath9k/hif_usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 4ed21dad6a8e16..3f563e02d17daa 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -733,11 +733,13 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb) return; } + rx_buf->skb = nskb; + usb_fill_int_urb(urb, hif_dev->udev, usb_rcvintpipe(hif_dev->udev, USB_REG_IN_PIPE), nskb->data, MAX_REG_IN_BUF_SIZE, - ath9k_hif_usb_reg_in_cb, nskb, 1); + ath9k_hif_usb_reg_in_cb, rx_buf, 1); } resubmit: From b5ba46b81c2fef00bcf110777fb6d51befa4a23e Mon Sep 17 00:00:00 2001 From: Alessio Bonfiglio Date: Tue, 14 Jul 2020 11:19:11 +0200 Subject: [PATCH 062/131] iwlwifi: Make some Killer Wireless-AC 1550 cards work again Fix the regression introduced by commit c8685937d07f ("iwlwifi: move pu devices to new table") by adding the ids and the configurations of two missing Killer 1550 cards in order to configure and let them work correctly again (following the new table convention). Resolve bug 208141 ("Wireless ac 9560 not working kernel 5.7.2", https://bugzilla.kernel.org/show_bug.cgi?id=208141). Fixes: c8685937d07f ("iwlwifi: move pu devices to new table") Signed-off-by: Alessio Bonfiglio Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200714091911.4442-1-alessio.bonfiglio@mail.polimi.it --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 65d65c6baf4c34..e02bafb8921f6a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -582,6 +582,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x30DC, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_name), IWL_DEV_INFO(0x31DC, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550s_name), IWL_DEV_INFO(0x31DC, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_name), + IWL_DEV_INFO(0xA370, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550s_name), + IWL_DEV_INFO(0xA370, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), From 1cfd3426ef989b83fa6176490a38777057e57f6c Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 14 Jul 2020 22:58:02 +0200 Subject: [PATCH 063/131] ath10k: Fix NULL pointer dereference in AHB device probe This fixes a NULL pointer dereference in the probe path for AHB devices. There attr parameter in the ath10k_ce_alloc_pipe() function is not initialized, but accessed. This function is called by ath10k_pci_setup_resource() which is called by ath10k_ahb_probe(). The struct ath10k_pci is also used for AHB devices and not only for PCI devices. The initialization of the new members of struct ath10k_pci is moved to ath10k_pci_setup_resource() which is used by the PCI and the AHB code. This also fixes a use after free bug in ath10k_pci_remove() when ar_pci is accessed after ath10k_core_destroy() was called, which calls ieee80211_free_hw() and frees this memory. This fixes the following bug seen with backports-5.8-rc2 on OpenWrt on a IPQ4019 device: [ 11.117462] 8<--- cut here --- [ 11.117494] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 11.119510] pgd = f377fd58 [ 11.127657] [00000000] *pgd=8e9a0835, *pte=00000000, *ppte=00000000 [ 11.130206] Internal error: Oops: 17 [#1] SMP ARM [ 11.136339] Modules linked in: ath10k_pci(+) ath10k_core ath xt_state xt_nat xt_conntrack xt_REDIRECT xt_MASQUERADE xt_FLOWOFFLOAD pppox ppp_generic nf_nat nf_flow_table_hw nf_flow_table nf_conntrack_rtcache nf_conntrack mac80211 ipt_REJECT cfg80211 xt_time xt_tcpudp xt_multiport xt_mark xt_mac xt_limit xt_comment xt_TCPMSS xt_LOG slhc nf_reject_ipv4 nf_log_ipv4 nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_filter ip_tables crc_ccitt compat nf_log_ipv6 nf_log_common ip6table_mangle ip6table_filter ip6_tables ip6t_REJECT x_tables nf_reject_ipv6 leds_gpio xhci_plat_hcd xhci_pci xhci_hcd dwc3 dwc3_qcom gpio_button_hotplug [ 11.174355] CPU: 2 PID: 257 Comm: kmodloader Not tainted 5.4.51 #0 [ 11.196585] Hardware name: Generic DT based system [ 11.202746] PC is at ath10k_ce_alloc_pipe+0x58/0x180 [ath10k_core] [ 11.207459] LR is at ath10k_pci_alloc_pipes+0x94/0xc8 [ath10k_pci] [ 11.213600] pc : [] lr : [] psr: 80000013 [ 11.219760] sp : cea0dc90 ip : cf4001f0 fp : 00000001 [ 11.225923] r10: 00000000 r9 : 00000018 r8 : ce4963b4 [ 11.231133] r7 : 00000000 r6 : ce491ea0 r5 : 00000000 r4 : ce4963b4 [ 11.236342] r3 : 0004a000 r2 : 0004a000 r1 : bf2d0d70 r0 : 00000006 [ 11.242942] Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 11.249452] Control: 10c5387d Table: 8e9c006a DAC: 00000051 [ 11.256656] Process kmodloader (pid: 257, stack limit = 0xaba286ca) [ 11.262386] Stack: (0xcea0dc90 to 0xcea0e000) [ 11.268462] dc80: 00000000 ce49629c ce491ea0 ce4963bc [ 11.272984] dca0: ce495ea0 bf2fbf98 00000002 ce4963a8 ce495ea0 00000000 ce491ea0 cf95d800 [ 11.281142] dcc0: cf95d810 cf95d810 00000001 bf2fc854 00000000 cf95d800 bf300748 ce495ea0 [ 11.289304] dce0: ce491ea0 d1300000 cf95d800 bf2fde8c 00000000 00000001 ce49cea0 00000000 [ 11.297462] dd00: 00000000 00000000 bf3010a0 cf95d810 bf3010a0 c0b61580 00000000 00000000 [ 11.305624] dd20: bf3010a0 0000000b c0b04e48 c06110c8 c0b61588 cf95d810 c0b61580 c060f740 [ 11.313781] dd40: cf95d810 00000000 bf3010a0 00000000 00000000 ce49d2a4 bf301100 c060fc90 [ 11.321943] dd60: 00000000 bf3010a0 cf95d810 c060fcf0 cf95d810 bf3010a0 c060fc98 c060dca4 [ 11.330101] dd80: cf809d58 cf952cb4 bf3010a0 ce967900 c0b1f2c8 c060ec28 bf3007b8 bf301038 [ 11.338263] dda0: bf3010a0 bf3010a0 c0b2d4d4 ffffe000 bf304000 c0610278 c0b04e48 c0b2d4d4 [ 11.346422] ddc0: ffffe000 bf2fe2b4 c0b04e48 bf30403c c0b04e48 c0302764 8040003f 00000001 [ 11.354582] dde0: 38e38e39 ce513580 c0b2cb50 cf801e00 cffbc6ac ce513600 cf801e00 cffbc6ac [ 11.362740] de00: 8040003e ce49d280 00000001 c0428d54 00000001 cf801e00 cffbc6ac ce513580 [ 11.370900] de20: ce49d280 0e391998 bf301100 ce49d340 d12d2000 ce49d280 00000001 c0398c2c [ 11.379061] de40: 00000001 cea0df34 cea0df34 00000001 d12d2000 c039ae48 bf30110c 00007fff [ 11.387221] de60: bf301100 c0398044 cf804028 bf301148 c0397674 bf30126c c08ee5c0 c08ee70c [ 11.395380] de80: bf30110c c0b04e48 c08ee518 00000000 c08ee570 c0b04e48 ce513600 fffff000 [ 11.403540] dea0: 00000001 ce513580 0000000d 0000000d 00000000 00000000 00000000 00000000 [ 11.411698] dec0: 00000000 00000000 6e72656b 00006c65 00000000 00000000 00000000 00000000 [ 11.419858] dee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 11.428018] df00: 00000000 0e391998 00000000 0000c610 d12de610 00000000 0062c620 ffffe000 [ 11.436180] df20: 000129d1 00000051 00000000 c039b228 00000000 d12d7afd d12d8e80 d12d2000 [ 11.444337] df40: 0000c610 d12de0e8 d12ddfa8 d12dab74 00009000 00009570 00003a2c 00009cae [ 11.452498] df60: 00000000 00000000 00000000 00003a1c 0000001e 0000001f 00000018 00000000 [ 11.460656] df80: 00000010 00000000 00000000 00000000 00000003 00000080 c0301204 cea0c000 [ 11.468817] dfa0: 00000080 c0301000 00000000 00000000 00620010 0000c610 000129d1 00000014 [ 11.476975] dfc0: 00000000 00000000 00000003 00000080 0000c610 00000000 b6fc1d20 00000000 [ 11.485137] dfe0: bef0ad14 bef0acf8 00011e14 b6f74c94 60000010 00620010 00000000 00000000 [ 11.493390] [] (ath10k_ce_alloc_pipe [ath10k_core]) from [] (ath10k_pci_alloc_pipes+0x94/0xc8 [ath10k_pci]) [ 11.501498] [] (ath10k_pci_alloc_pipes [ath10k_pci]) from [] (ath10k_pci_setup_resource+0xb8/0xf0 [ath10k_pci]) [ 11.512773] [] (ath10k_pci_setup_resource [ath10k_pci]) from [] (ath10k_ahb_probe+0x32c/0x670 [ath10k_pci]) [ 11.524566] [] (ath10k_ahb_probe [ath10k_pci]) from [] (platform_drv_probe+0x34/0x70) [ 11.536016] [] (platform_drv_probe) from [] (really_probe+0x1f0/0x358) [ 11.545729] [] (really_probe) from [] (device_driver_attach+0x58/0x60) [ 11.553886] [] (device_driver_attach) from [] (__driver_attach+0x58/0xcc) [ 11.562134] [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x8c) [ 11.570731] [] (bus_for_each_dev) from [] (bus_add_driver+0x1c8/0x1d8) [ 11.578886] [] (bus_add_driver) from [] (driver_register+0x74/0x108) [ 11.587060] [] (driver_register) from [] (ath10k_ahb_init+0x18/0x38 [ath10k_pci]) [ 11.595320] [] (ath10k_ahb_init [ath10k_pci]) from [] (init_module+0x3c/0x1000 [ath10k_pci]) [ 11.604432] [] (init_module [ath10k_pci]) from [] (do_one_initcall+0x84/0x1d8) [ 11.614657] [] (do_one_initcall) from [] (do_init_module+0x5c/0x228) [ 11.623421] [] (do_init_module) from [] (load_module+0x1fc8/0x224c) [ 11.631663] [] (load_module) from [] (sys_init_module+0x15c/0x17c) [ 11.639390] [] (sys_init_module) from [] (ret_fast_syscall+0x0/0x54) [ 11.647370] Exception stack(0xcea0dfa8 to 0xcea0dff0) [ 11.655615] dfa0: 00000000 00000000 00620010 0000c610 000129d1 00000014 [ 11.660569] dfc0: 00000000 00000000 00000003 00000080 0000c610 00000000 b6fc1d20 00000000 [ 11.668725] dfe0: bef0ad14 bef0acf8 00011e14 b6f74c94 [ 11.676886] Code: e1c321d4 e0433002 e0232397 e5843014 (e5953000) [ 11.681958] ---[ end trace 8f35917de2e76854 ]--- Fixes: 521fc37be3d8 ("ath10k: Avoid override CE5 configuration for QCA99X0 chipsets") Reported-by: Stefan Lippers-Hollmann [ipq40xx/ map-ac2200] Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200714205802.17688-1-hauke@hauke-m.de --- drivers/net/wireless/ath/ath10k/ahb.c | 2 +- drivers/net/wireless/ath/ath10k/pci.c | 78 +++++++++++++-------------- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 342a7e58018acd..05a61975c83f4b 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -820,7 +820,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) ath10k_ahb_release_irq_legacy(ar); err_free_pipes: - ath10k_pci_free_pipes(ar); + ath10k_pci_release_resource(ar); err_resource_deinit: ath10k_ahb_resource_deinit(ar); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 1d941d53fdc9ff..cfde7791291ab3 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3473,6 +3473,28 @@ int ath10k_pci_setup_resource(struct ath10k *ar) timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0); + ar_pci->attr = kmemdup(pci_host_ce_config_wlan, + sizeof(pci_host_ce_config_wlan), + GFP_KERNEL); + if (!ar_pci->attr) + return -ENOMEM; + + ar_pci->pipe_config = kmemdup(pci_target_ce_config_wlan, + sizeof(pci_target_ce_config_wlan), + GFP_KERNEL); + if (!ar_pci->pipe_config) { + ret = -ENOMEM; + goto err_free_attr; + } + + ar_pci->serv_to_pipe = kmemdup(pci_target_service_to_ce_map_wlan, + sizeof(pci_target_service_to_ce_map_wlan), + GFP_KERNEL); + if (!ar_pci->serv_to_pipe) { + ret = -ENOMEM; + goto err_free_pipe_config; + } + if (QCA_REV_6174(ar) || QCA_REV_9377(ar)) ath10k_pci_override_ce_config(ar); @@ -3480,18 +3502,31 @@ int ath10k_pci_setup_resource(struct ath10k *ar) if (ret) { ath10k_err(ar, "failed to allocate copy engine pipes: %d\n", ret); - return ret; + goto err_free_serv_to_pipe; } return 0; + +err_free_serv_to_pipe: + kfree(ar_pci->serv_to_pipe); +err_free_pipe_config: + kfree(ar_pci->pipe_config); +err_free_attr: + kfree(ar_pci->attr); + return ret; } void ath10k_pci_release_resource(struct ath10k *ar) { + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + ath10k_pci_rx_retry_sync(ar); netif_napi_del(&ar->napi); ath10k_pci_ce_deinit(ar); ath10k_pci_free_pipes(ar); + kfree(ar_pci->attr); + kfree(ar_pci->pipe_config); + kfree(ar_pci->serv_to_pipe); } static const struct ath10k_bus_ops ath10k_pci_bus_ops = { @@ -3601,30 +3636,6 @@ static int ath10k_pci_probe(struct pci_dev *pdev, timer_setup(&ar_pci->ps_timer, ath10k_pci_ps_timer, 0); - ar_pci->attr = kmemdup(pci_host_ce_config_wlan, - sizeof(pci_host_ce_config_wlan), - GFP_KERNEL); - if (!ar_pci->attr) { - ret = -ENOMEM; - goto err_free; - } - - ar_pci->pipe_config = kmemdup(pci_target_ce_config_wlan, - sizeof(pci_target_ce_config_wlan), - GFP_KERNEL); - if (!ar_pci->pipe_config) { - ret = -ENOMEM; - goto err_free; - } - - ar_pci->serv_to_pipe = kmemdup(pci_target_service_to_ce_map_wlan, - sizeof(pci_target_service_to_ce_map_wlan), - GFP_KERNEL); - if (!ar_pci->serv_to_pipe) { - ret = -ENOMEM; - goto err_free; - } - ret = ath10k_pci_setup_resource(ar); if (ret) { ath10k_err(ar, "failed to setup resource: %d\n", ret); @@ -3705,10 +3716,9 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_free_irq: ath10k_pci_free_irq(ar); - ath10k_pci_rx_retry_sync(ar); err_deinit_irq: - ath10k_pci_deinit_irq(ar); + ath10k_pci_release_resource(ar); err_sleep: ath10k_pci_sleep_sync(ar); @@ -3720,29 +3730,18 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_core_destroy: ath10k_core_destroy(ar); -err_free: - kfree(ar_pci->attr); - kfree(ar_pci->pipe_config); - kfree(ar_pci->serv_to_pipe); - return ret; } static void ath10k_pci_remove(struct pci_dev *pdev) { struct ath10k *ar = pci_get_drvdata(pdev); - struct ath10k_pci *ar_pci; ath10k_dbg(ar, ATH10K_DBG_PCI, "pci remove\n"); if (!ar) return; - ar_pci = ath10k_pci_priv(ar); - - if (!ar_pci) - return; - ath10k_core_unregister(ar); ath10k_pci_free_irq(ar); ath10k_pci_deinit_irq(ar); @@ -3750,9 +3749,6 @@ static void ath10k_pci_remove(struct pci_dev *pdev) ath10k_pci_sleep_sync(ar); ath10k_pci_release(ar); ath10k_core_destroy(ar); - kfree(ar_pci->attr); - kfree(ar_pci->pipe_config); - kfree(ar_pci->serv_to_pipe); } MODULE_DEVICE_TABLE(pci, ath10k_pci_id_table); From 24a63fe6d45d6527db5ab87bcd1da6921f10e89e Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 20 Jul 2020 15:18:43 +0800 Subject: [PATCH 064/131] net: bcmgenet: fix error returns in bcmgenet_probe() The driver forgets to call clk_disable_unprepare() in error path after a success calling for clk_prepare_enable(). Fix to goto err_clk_disable if clk_prepare_enable() is successful. Fixes: 99d55638d4b0 ("net: bcmgenet: enable NETIF_F_HIGHDMA flag") Signed-off-by: Zhang Changzhong Acked-by: Doug Berger Acked-by: Florian fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 368e05b16ae9e9..903811e843d223 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3988,7 +3988,7 @@ static int bcmgenet_probe(struct platform_device *pdev) if (err) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) - goto err; + goto err_clk_disable; /* Mii wait queue */ init_waitqueue_head(&priv->wq); From 53a92889ec0e6d968a7f4098a16031a672b6fd51 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 20 Jul 2020 17:36:34 +0800 Subject: [PATCH 065/131] net: bcmgenet: add missed clk_disable_unprepare in bcmgenet_probe The driver forgets to call clk_disable_unprepare() in error path after a success calling for clk_prepare_enable(). Fix to goto err_clk_disable if clk_prepare_enable() is successful. Fixes: c80d36ff63a5 ("net: bcmgenet: Use devm_clk_get_optional() to get the clocks") Signed-off-by: Zhang Changzhong Acked-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 903811e843d223..e471b14fc6e98c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4000,14 +4000,14 @@ static int bcmgenet_probe(struct platform_device *pdev) if (IS_ERR(priv->clk_wol)) { dev_dbg(&priv->pdev->dev, "failed to get enet-wol clock\n"); err = PTR_ERR(priv->clk_wol); - goto err; + goto err_clk_disable; } priv->clk_eee = devm_clk_get_optional(&priv->pdev->dev, "enet-eee"); if (IS_ERR(priv->clk_eee)) { dev_dbg(&priv->pdev->dev, "failed to get enet-eee clock\n"); err = PTR_ERR(priv->clk_eee); - goto err; + goto err_clk_disable; } /* If this is an internal GPHY, power it on now, before UniMAC is From 639f181f0ee20d3249dbc55f740f0167267180f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Jul 2020 12:41:46 +0100 Subject: [PATCH 066/131] rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA rxrpc_sendmsg() returns EPIPE if there's an outstanding error, such as if rxrpc_recvmsg() indicating ENODATA if there's nothing for it to read. Change rxrpc_recvmsg() to return EAGAIN instead if there's nothing to read as this particular error doesn't get stored in ->sk_err by the networking core. Also change rxrpc_sendmsg() so that it doesn't fail with delayed receive errors (there's no way for it to report which call, if any, the error was caused by). Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/recvmsg.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 2989742a4aa118..490b1927215c78 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -543,7 +543,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, list_empty(&rx->recvmsg_q) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); - return -ENODATA; + return -EAGAIN; } if (list_empty(&rx->recvmsg_q)) { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1304b8608f56e4..03a30d014bb6b4 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -304,7 +304,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; more = msg->msg_flags & MSG_MORE; From 2bced6aefa3d0347e11efc610759e1317bfca7a7 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 20 Jul 2020 16:24:28 +0200 Subject: [PATCH 067/131] net/smc: put slot when connection is killed To get a send slot smc_wr_tx_get_free_slot() is called, which might wait for a free slot. When smc_wr_tx_get_free_slot() returns there is a check if the connection was killed in the meantime. In that case don't only return an error, but also put back the free slot. Fixes: b290098092e4 ("net/smc: cancel send and receive for terminated socket") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a47e8855e04545..ce468ff62a1915 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -66,9 +66,13 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); - if (conn->killed) + if (conn->killed) { /* abnormal termination */ + if (!rc) + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); rc = -EPIPE; + } return rc; } From a9e445029570ab691a44389d68b9c544338586b5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 20 Jul 2020 16:24:29 +0200 Subject: [PATCH 068/131] net/smc: fix dmb buffer shortage There is a current limit of 1920 registered dmb buffers per ISM device for smc-d. One link group can contain 255 connections, each connection is using one dmb buffer. When the connection is closed then the registered buffer is held in a queue and is reused by the next connection. When a link group is 'full' then another link group is created and uses an own buffer pool. The link groups are added to a list using list_add() which puts a new link group to the first position in the list. In the situation that many connections are opened (>1920) and a few of them stay open while others are closed quickly we end up with at least 8 link groups. For a new connection a matching link group is looked up, iterating over the list of link groups. The trailing 7 link groups all have registered dmb buffers which could be reused, while the first link group has only a few dmb buffers and then hit the 1920 limit. Because the first link group is not full (255 connection limit not reached) it is chosen and finally the connection falls back to TCP because there is no dmb buffer available in this link group. There are multiple ways to fix that: using list_add_tail() allows to scan older link groups first for free buffers which ensures that buffers are reused first. This fixes the problem for smc-r link groups as well. For smc-d there is an even better way to address this problem because smc-d does not have the 255 connections per link group limit. So fix the problem for smc-d by allowing large link groups. Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM") Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ca3dc6af73af0a..f82a2e59991719 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -444,7 +444,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } smc->conn.lgr = lgr; spin_lock_bh(lgr_lock); - list_add(&lgr->list, lgr_list); + list_add_tail(&lgr->list, lgr_list); spin_unlock_bh(lgr_lock); return 0; @@ -1311,7 +1311,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && lgr->vlan_id == ini->vlan_id && - (role == SMC_CLNT || + (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; From 6790711f8ac5faabc43237c0d05d93db431a1ecc Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 20 Jul 2020 22:28:29 +0800 Subject: [PATCH 069/131] dpaa_eth: Fix one possible memleak in dpaa_eth_probe When dma_coerce_mask_and_coherent() fails, the alloced netdev need to be freed. Fixes: 060ad66f9795 ("dpaa_eth: change DMA device") Signed-off-by: Liu Jian Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 2972244e6eb0d6..43570f4911ea12 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2938,7 +2938,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) DMA_BIT_MASK(40)); if (err) { netdev_err(net_dev, "dma_coerce_mask_and_coherent() failed\n"); - return err; + goto free_netdev; } /* If fsl_fm_max_frm is set to a higher value than the all-common 1500, From 5dbaeb87f2b309936be0aeae00cbc9e7f20ab296 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 20 Jul 2020 22:31:49 +0800 Subject: [PATCH 070/131] mlxsw: destroy workqueue when trap_register in mlxsw_emad_init When mlxsw_core_trap_register fails in mlxsw_emad_init, destroy_workqueue() shouled be called to destroy mlxsw_core->emad_wq. Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Liu Jian Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e9ccd333f61dd8..d6d6fe64887b3c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -710,7 +710,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); if (err) - return err; + goto err_trap_register; err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core); if (err) @@ -722,6 +722,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) err_emad_trap_set: mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); +err_trap_register: destroy_workqueue(mlxsw_core->emad_wq); return err; } From 34b85adad831547cdd349ce9a05bbe751f9f4d46 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 20 Jul 2020 17:46:58 +0200 Subject: [PATCH 071/131] Documentation: bareudp: update iproute2 sample commands bareudp.rst was written before iproute2 gained support for this new type of tunnel. Therefore, the sample command lines didn't match the final iproute2 implementation. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst index 465a8b251bfeca..ff406563ea8875 100644 --- a/Documentation/networking/bareudp.rst +++ b/Documentation/networking/bareudp.rst @@ -26,7 +26,7 @@ Usage 1) Device creation & deletion - a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847. + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls_uc This creates a bareudp tunnel device which tunnels L3 traffic with ethertype 0x8847 (MPLS traffic). The destination port of the UDP header will be set to @@ -34,14 +34,21 @@ Usage b) ip link delete bareudp0 -2) Device creation with multiple proto mode enabled +2) Device creation with multiproto mode enabled -There are two ways to create a bareudp device for MPLS & IP with multiproto mode -enabled. +The multiproto mode allows bareudp tunnels to handle several protocols of the +same family. It is currently only available for IP and MPLS. This mode has to +be enabled explicitly with the "multiproto" flag. - a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847 multiproto + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype ipv4 multiproto - b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls + For an IPv4 tunnel the multiproto mode allows the tunnel to also handle + IPv6. + + b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls_uc multiproto + + For MPLS, the multiproto mode allows the tunnel to handle both unicast + and multicast MPLS packets. 3) Device Usage From 5d93518e06b2102253d465e523f16bd4cb5ce859 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Mon, 20 Jul 2020 12:43:27 -0400 Subject: [PATCH 072/131] net: hsr: check for return value of skb_put_padto() skb_put_padto() can fail. So check for return type and return NULL for skb. Caller checks for skb and acts correctly if it is NULL. Fixes: 6d6148bc78d2 ("net: hsr: fix incorrect lsdu size in the tag of HSR frames for small frames") Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_forward.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index e42fd356f07355..1ea17752fffc9b 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -120,15 +120,17 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, return skb_clone(frame->skb_std, GFP_ATOMIC); } -static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port, u8 proto_version) +static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, + struct hsr_frame_info *frame, + struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; int lsdu_size; /* pad to minimum packet size which is 60 + 6 (HSR tag) */ - skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + return NULL; if (port->type == HSR_PT_SLAVE_A) lane_id = 0; @@ -147,6 +149,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); + + return skb; } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -175,9 +179,10 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port, port->hsr->prot_version); - - return skb; + /* skb_put_padto free skb on error and hsr_fill_tag returns NULL in + * that case + */ + return hsr_fill_tag(skb, frame, port, port->hsr->prot_version); } /* If the original frame was an HSR tagged frame, just clone it to be sent From f85ae16f924f92a370b81b4e77862c1c59882fce Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 20 Jul 2020 16:00:13 -0700 Subject: [PATCH 073/131] ionic: use offset for ethtool regs data Use an offset to write the second half of the regs data into the second half of the buffer instead of overwriting the first half. Fixes: 4d03e00a2140 ("ionic: Add initial ethtool support") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index e03ea9b18f95d5..095561924bdc13 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -103,15 +103,18 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { struct ionic_lif *lif = netdev_priv(netdev); + unsigned int offset; unsigned int size; regs->version = IONIC_DEV_CMD_REG_VERSION; + offset = 0; size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32); - memcpy_fromio(p, lif->ionic->idev.dev_info_regs->words, size); + memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size); + offset += size; size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32); - memcpy_fromio(p, lif->ionic->idev.dev_cmd_regs->words, size); + memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); } static int ionic_get_link_ksettings(struct net_device *netdev, From cbec2153a9a68d011454960ba84887e46e40b37d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 20 Jul 2020 16:00:14 -0700 Subject: [PATCH 074/131] ionic: fix up filter locks and debug msgs Add in a couple of forgotten spinlocks and fix up some of the debug messages around filter management. Fixes: c1e329ebec8d ("ionic: Add management of rx filters") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 17 +++++++---------- .../ethernet/pensando/ionic/ionic_rx_filter.c | 5 +++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f49486b6d04d26..41e86d6b76b60f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -863,8 +863,7 @@ static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) if (f) return 0; - netdev_dbg(lif->netdev, "rx_filter add ADDR %pM (id %d)\n", addr, - ctx.comp.rx_filter_add.filter_id); + netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr); memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN); err = ionic_adminq_post_wait(lif, &ctx); @@ -893,6 +892,9 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return -ENOENT; } + netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", + addr, f->filter_id); + ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); ionic_rx_filter_free(lif, f); spin_unlock_bh(&lif->rx_filters.lock); @@ -901,9 +903,6 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) if (err && err != -EEXIST) return err; - netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", addr, - ctx.cmd.rx_filter_del.filter_id); - return 0; } @@ -1351,13 +1350,11 @@ static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, }; int err; + netdev_dbg(netdev, "rx_filter add VLAN %d\n", vid); err = ionic_adminq_post_wait(lif, &ctx); if (err) return err; - netdev_dbg(netdev, "rx_filter add VLAN %d (id %d)\n", vid, - ctx.comp.rx_filter_add.filter_id); - return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx); } @@ -1382,8 +1379,8 @@ static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, return -ENOENT; } - netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", vid, - le32_to_cpu(ctx.cmd.rx_filter_del.filter_id)); + netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", + vid, f->filter_id); ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id); ionic_rx_filter_free(lif, f); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 80eeb7696e0144..fb9d828812bd21 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -69,10 +69,12 @@ int ionic_rx_filters_init(struct ionic_lif *lif) spin_lock_init(&lif->rx_filters.lock); + spin_lock_bh(&lif->rx_filters.lock); for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { INIT_HLIST_HEAD(&lif->rx_filters.by_hash[i]); INIT_HLIST_HEAD(&lif->rx_filters.by_id[i]); } + spin_unlock_bh(&lif->rx_filters.lock); return 0; } @@ -84,11 +86,13 @@ void ionic_rx_filters_deinit(struct ionic_lif *lif) struct hlist_node *tmp; unsigned int i; + spin_lock_bh(&lif->rx_filters.lock); for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { head = &lif->rx_filters.by_id[i]; hlist_for_each_entry_safe(f, tmp, head, by_id) ionic_rx_filter_free(lif, f); } + spin_unlock_bh(&lif->rx_filters.lock); } int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, @@ -124,6 +128,7 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id); f->rxq_index = rxq_index; memcpy(&f->cmd, ac, sizeof(f->cmd)); + netdev_dbg(lif->netdev, "rx_filter add filter_id %d\n", f->filter_id); INIT_HLIST_NODE(&f->by_hash); INIT_HLIST_NODE(&f->by_id); From cc4428c4de8c31f12e4690d0409e0432fe05702f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 20 Jul 2020 16:00:15 -0700 Subject: [PATCH 075/131] ionic: update filter id after replay When we replay the rx filters after a fw-upgrade we get new filter_id values from the FW, which we need to save and update in our local filter list. This allows us to delete the filters with the correct filter_id when we're done. Fixes: 7e4d47596b68 ("ionic: replay filters after fw upgrade") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../ethernet/pensando/ionic/ionic_rx_filter.c | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index fb9d828812bd21..cd0076fc3044e2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -21,13 +21,16 @@ void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f) void ionic_rx_filter_replay(struct ionic_lif *lif) { struct ionic_rx_filter_add_cmd *ac; + struct hlist_head new_id_list; struct ionic_admin_ctx ctx; struct ionic_rx_filter *f; struct hlist_head *head; struct hlist_node *tmp; + unsigned int key; unsigned int i; int err; + INIT_HLIST_HEAD(&new_id_list); ac = &ctx.cmd.rx_filter_add; for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { @@ -58,9 +61,30 @@ void ionic_rx_filter_replay(struct ionic_lif *lif) ac->mac.addr); break; } + spin_lock_bh(&lif->rx_filters.lock); + ionic_rx_filter_free(lif, f); + spin_unlock_bh(&lif->rx_filters.lock); + + continue; } + + /* remove from old id list, save new id in tmp list */ + spin_lock_bh(&lif->rx_filters.lock); + hlist_del(&f->by_id); + spin_unlock_bh(&lif->rx_filters.lock); + f->filter_id = le32_to_cpu(ctx.comp.rx_filter_add.filter_id); + hlist_add_head(&f->by_id, &new_id_list); } } + + /* rebuild the by_id hash lists with the new filter ids */ + spin_lock_bh(&lif->rx_filters.lock); + hlist_for_each_entry_safe(f, tmp, &new_id_list, by_id) { + key = f->filter_id & IONIC_RX_FILTER_HLISTS_MASK; + head = &lif->rx_filters.by_id[key]; + hlist_add_head(&f->by_id, head); + } + spin_unlock_bh(&lif->rx_filters.lock); } int ionic_rx_filters_init(struct ionic_lif *lif) From bdff46665ee655600d0fe2a0e5f62ec7853d3b22 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 20 Jul 2020 16:00:16 -0700 Subject: [PATCH 076/131] ionic: keep rss hash after fw update Make sure the RSS hash key is kept across a fw update by not de-initing it when an update is happening. Fixes: c672412f6172 ("ionic: remove lifs on fw reset") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 41e86d6b76b60f..ddb9ad5b294cd9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2277,11 +2277,10 @@ static void ionic_lif_deinit(struct ionic_lif *lif) cancel_work_sync(&lif->deferred.work); cancel_work_sync(&lif->tx_timeout_work); ionic_rx_filters_deinit(lif); + if (lif->netdev->features & NETIF_F_RXHASH) + ionic_lif_rss_deinit(lif); } - if (lif->netdev->features & NETIF_F_RXHASH) - ionic_lif_rss_deinit(lif); - napi_disable(&lif->adminqcq->napi); ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); From 0925e9db4dc86daf666d9a3d53c7db14ac6d5d00 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 20 Jul 2020 16:00:17 -0700 Subject: [PATCH 077/131] ionic: use mutex to protect queue operations The ionic_wait_on_bit_lock() was a open-coded mutex knock-off used only for protecting the queue reset operations, and there was no reason not to use the real thing. We can use the lock more correctly and to better protect the queue stop and start operations from cross threading. We can also remove a useless and expensive bit operation from the Rx path. This fixes a case found where the link_status_check from a link flap could run into an MTU change and cause a crash. Fixes: beead698b173 ("ionic: Add the basic NDO callbacks for netdev support") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.c | 28 +++++++++++-------- .../net/ethernet/pensando/ionic/ionic_lif.h | 8 +----- .../net/ethernet/pensando/ionic/ionic_txrx.c | 6 ---- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ddb9ad5b294cd9..5fd31ba5693729 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -96,8 +96,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) u16 link_status; bool link_up; - if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state) || - test_bit(IONIC_LIF_F_QUEUE_RESET, lif->state)) + if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state)) return; link_status = le16_to_cpu(lif->info->status.link_status); @@ -114,16 +113,22 @@ static void ionic_link_status_check(struct ionic_lif *lif) netif_carrier_on(netdev); } - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + mutex_lock(&lif->queue_lock); ionic_start_queues(lif); + mutex_unlock(&lif->queue_lock); + } } else { if (netif_carrier_ok(netdev)) { netdev_info(netdev, "Link down\n"); netif_carrier_off(netdev); } - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + mutex_lock(&lif->queue_lock); ionic_stop_queues(lif); + mutex_unlock(&lif->queue_lock); + } } clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state); @@ -1990,16 +1995,13 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg) bool running; int err = 0; - err = ionic_wait_for_bit(lif, IONIC_LIF_F_QUEUE_RESET); - if (err) - return err; - + mutex_lock(&lif->queue_lock); running = netif_running(lif->netdev); if (running) { netif_device_detach(lif->netdev); err = ionic_stop(lif->netdev); if (err) - goto reset_out; + return err; } if (cb) @@ -2009,9 +2011,7 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg) err = ionic_open(lif->netdev); netif_device_attach(lif->netdev); } - -reset_out: - clear_bit(IONIC_LIF_F_QUEUE_RESET, lif->state); + mutex_unlock(&lif->queue_lock); return err; } @@ -2158,7 +2158,9 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) if (test_bit(IONIC_LIF_F_UP, lif->state)) { dev_info(ionic->dev, "Surprise FW stop, stopping queues\n"); + mutex_lock(&lif->queue_lock); ionic_stop_queues(lif); + mutex_unlock(&lif->queue_lock); } if (netif_running(lif->netdev)) { @@ -2285,6 +2287,7 @@ static void ionic_lif_deinit(struct ionic_lif *lif) ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); + mutex_destroy(&lif->queue_lock); ionic_lif_reset(lif); } @@ -2461,6 +2464,7 @@ static int ionic_lif_init(struct ionic_lif *lif) return err; lif->hw_index = le16_to_cpu(comp.hw_index); + mutex_init(&lif->queue_lock); /* now that we have the hw_index we can figure out our doorbell page */ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index ed126dd74e01fd..8dc2c5d77424cb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -135,7 +135,6 @@ enum ionic_lif_state_flags { IONIC_LIF_F_SW_DEBUG_STATS, IONIC_LIF_F_UP, IONIC_LIF_F_LINK_CHECK_REQUESTED, - IONIC_LIF_F_QUEUE_RESET, IONIC_LIF_F_FW_RESET, /* leave this as last */ @@ -165,6 +164,7 @@ struct ionic_lif { unsigned int hw_index; unsigned int kern_pid; u64 __iomem *kern_dbpage; + struct mutex queue_lock; /* lock for queue structures */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; @@ -213,12 +213,6 @@ struct ionic_lif { #define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) #define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) -/* return 0 if successfully set the bit, else non-zero */ -static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname) -{ - return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE); -} - static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) { u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index b7f900c11834ec..85eb8f276a3701 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -161,12 +161,6 @@ static void ionic_rx_clean(struct ionic_queue *q, return; } - /* no packet processing while resetting */ - if (unlikely(test_bit(IONIC_LIF_F_QUEUE_RESET, q->lif->state))) { - stats->dropped++; - return; - } - stats->pkts++; stats->bytes += le16_to_cpu(comp->len); From c271042eb6a031d1333cf57422ec1d20726901ab Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Sat, 18 Jul 2020 00:31:42 +0530 Subject: [PATCH 078/131] crypto/chtls: fix tls alert messages corrupted by tls data When tls data skb is pending for Tx and tls alert comes , It is wrongly overwrite the record type of tls data to tls alert record type. fix the issue correcting it. v1->v2: - Correct submission tree. - Add fixes tag. Fixes: 6919a8264a32 ("Crypto/chtls: add/delete TLS header in driver") Signed-off-by: Vinay Kumar Yadav Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index e1401d9cc756ce..2e9acae1cba3b7 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1052,14 +1052,15 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) &record_type); if (err) goto out_err; + + /* Avoid appending tls handshake, alert to tls data */ + if (skb) + tx_skb_finalize(skb); } recordsz = size; csk->tlshws.txleft = recordsz; csk->tlshws.type = record_type; - - if (skb) - ULP_SKB_CB(skb)->ulp.tls.type = record_type; } if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || From 30d9e5057ac01dd6982adace221bfcafb1f8186e Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Sat, 18 Jul 2020 00:41:07 +0530 Subject: [PATCH 079/131] crypto/chtls: correct net_device reference count ip_dev_find() call holds net_device reference which is not needed, use __ip_dev_find() which does not hold reference. v1->v2: - Correct submission tree. - Add fixes tag. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index f200fae6f7cb55..54093115eb95d9 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -102,7 +102,7 @@ static struct net_device *chtls_find_netdev(struct chtls_dev *cdev, case PF_INET: if (likely(!inet_sk(sk)->inet_rcv_saddr)) return ndev; - ndev = ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr); + ndev = __ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr, false); break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: From 1e8fd3a97f2d83a7197876ceb4f37b4c2b00a0f3 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 18 Jul 2020 00:31:49 -0500 Subject: [PATCH 080/131] nfc: s3fwrn5: add missing release on skb in s3fwrn5_recv_frame The implementation of s3fwrn5_recv_frame() is supposed to consume skb on all execution paths. Release skb before returning -ENODEV. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 91d4d5b28a7d9e..ba6c486d646597 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -198,6 +198,7 @@ int s3fwrn5_recv_frame(struct nci_dev *ndev, struct sk_buff *skb, case S3FWRN5_MODE_FW: return s3fwrn5_fw_recv_frame(ndev, skb); default: + kfree_skb(skb); return -ENODEV; } } From ae372cb1750f6c95370f92fe5f5620e0954663ba Mon Sep 17 00:00:00 2001 From: wenxu Date: Sun, 19 Jul 2020 20:30:37 +0800 Subject: [PATCH 081/131] net/sched: act_ct: fix restore the qdisc_skb_cb after defrag The fragment packets do defrag in tcf_ct_handle_fragments will clear the skb->cb which make the qdisc_skb_cb clear too. So the qdsic_skb_cb should be store before defrag and restore after that. It also update the pkt_len after all the fragments finish the defrag to one packet and make the following actions counter correct. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/act_ct.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 67504aece9ae5d..5928efb6449c3b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -673,9 +673,10 @@ static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) } static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, - u8 family, u16 zone) + u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; + struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; @@ -693,6 +694,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); + cb = *qdisc_skb_cb(skb); if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -703,6 +705,9 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, local_bh_enable(); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -711,12 +716,16 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = nf_ct_frag6_gather(net, skb, user); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; #else err = -EOPNOTSUPP; goto out_free; #endif } + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -914,6 +923,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, int nh_ofs, err, retval; struct tcf_ct_params *p; bool skip_add = false; + bool defrag = false; struct nf_conn *ct; u8 family; @@ -946,7 +956,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); - err = tcf_ct_handle_fragments(net, skb, family, p->zone); + err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; goto out; @@ -1014,6 +1024,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, out: tcf_action_update_bstats(&c->common, skb); + if (defrag) + qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; drop: From befc113c56a76ae7be3986034a0e476d3385e265 Mon Sep 17 00:00:00 2001 From: Huang Guobin Date: Sun, 19 Jul 2020 21:46:14 -0400 Subject: [PATCH 082/131] net: ag71xx: add missed clk_disable_unprepare in error path of probe The ag71xx_mdio_probe() forgets to call clk_disable_unprepare() when of_reset_control_get_exclusive() failed. Add the missed call to fix it. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Reported-by: Hulk Robot Signed-off-by: Huang Guobin Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 112edbd308230e..38cce66ef212a5 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -556,7 +556,8 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio"); if (IS_ERR(ag->mdio_reset)) { netif_err(ag, probe, ndev, "Failed to get reset mdio.\n"); - return PTR_ERR(ag->mdio_reset); + err = PTR_ERR(ag->mdio_reset); + goto mdio_err_put_clk; } mii_bus->name = "ag71xx_mdio"; From db44c60c45aa533c92394c778247cfe8bdd66134 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Mon, 20 Jul 2020 09:58:39 +0800 Subject: [PATCH 083/131] net: neterion: vxge: reduce stack usage in VXGE_COMPLETE_VPATH_TX Fix the warning: [-Werror=-Wframe-larger-than=] drivers/net/ethernet/neterion/vxge/vxge-main.c: In function'VXGE_COMPLETE_VPATH_TX.isra.37': drivers/net/ethernet/neterion/vxge/vxge-main.c:119:1: warning: the frame size of 1056 bytes is larger than 1024 bytes Dropping the NR_SKB_COMPLETED to 16 is appropriate that won't have much impact on performance and functionality. Signed-off-by: Bixuan Cui Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 9b63574b620268..b5f1849fd28026 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -98,7 +98,7 @@ static inline void VXGE_COMPLETE_VPATH_TX(struct vxge_fifo *fifo) { struct sk_buff **skb_ptr = NULL; struct sk_buff **temp; -#define NR_SKB_COMPLETED 128 +#define NR_SKB_COMPLETED 16 struct sk_buff *completed[NR_SKB_COMPLETED]; int more; From 6ef9dcb78046b346b5508ca1659848b136a343c2 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 21 Jul 2020 08:57:05 +0700 Subject: [PATCH 084/131] tipc: allow to build NACK message in link timeout function Commit 02288248b051 ("tipc: eliminate gap indicator from ACK messages") eliminated sending of the 'gap' indicator in regular ACK messages and only allowed to build NACK message with enabled probe/probe_reply. However, necessary correction for building NACK message was missed in tipc_link_timeout() function. This leads to significant delay and link reset (due to retransmission failure) in lossy environment. This commit fixes it by setting the 'probe' flag to 'true' when the receive deferred queue is not empty. As a result, NACK message will be built to send back to another peer. Fixes: 02288248b051 ("tipc: eliminate gap indicator from ACK messages") Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 263d950e70e9ad..d40f8e5b768353 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -827,11 +827,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); - state |= !skb_queue_empty(&l->deferdq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; From 4cb699d0447be8e0906539f93cbe41e19598ee5a Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Fri, 10 Jul 2020 10:40:54 +0800 Subject: [PATCH 085/131] riscv: kasan: use local_tlb_flush_all() to avoid uninitialized __sbi_rfence It fails to boot the v5.8-rc4 kernel with CONFIG_KASAN because kasan_init and kasan_early_init use uninitialized __sbi_rfence as executing the tlb_flush_all(). Actually, at this moment, only the CPU which is responsible for the system initialization enables the MMU. Other CPUs are parking at the .Lsecondary_start. Hence the tlb_flush_all() is able to be replaced by local_tlb_flush_all() to avoid using uninitialized __sbi_rfence. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 4a8b61806633d6..87b4ab3d3c77c3 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -44,7 +44,7 @@ asmlinkage void __init kasan_early_init(void) (__pa(((uintptr_t) kasan_early_shadow_pmd))), __pgprot(_PAGE_TABLE))); - flush_tlb_all(); + local_flush_tlb_all(); } static void __init populate(void *start, void *end) @@ -79,7 +79,7 @@ static void __init populate(void *start, void *end) pfn_pgd(PFN_DOWN(__pa(&pmd[offset])), __pgprot(_PAGE_TABLE))); - flush_tlb_all(); + local_flush_tlb_all(); memset(start, 0, end - start); } From 6cfcd5563b4fadbf49ba8fa481978e5e86d30322 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 13 Jul 2020 09:26:01 -0700 Subject: [PATCH 086/131] clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4 Carlos Hernandez reported that we now have a suspend and resume regresssion on am3 and am4 compared to the earlier kernels. While suspend and resume works with v5.8-rc3, we now get errors with rtcwake: pm33xx pm33xx: PM: Could not transition all powerdomains to target state ... rtcwake: write error This is because we now fail to idle the system timer clocks that the idle code checks and the error gets propagated to the rtcwake. Turns out there are several issues that need to be fixed: 1. Ignore no-idle and no-reset configured timers for the ti-sysc interconnect target driver as otherwise it will keep the system timer clocks enabled 2. Toggle the system timer functional clock for suspend for am3 and am4 (but not for clocksource on am3) 3. Only reconfigure type1 timers in dmtimer_systimer_disable() 4. Use of_machine_is_compatible() instead of of_device_is_compatible() for checking the SoC type Fixes: 52762fbd1c47 ("clocksource/drivers/timer-ti-dm: Add clockevent and clocksource support") Reported-by: Carlos Hernandez Signed-off-by: Tony Lindgren Tested-by: Carlos Hernandez Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200713162601.6829-1-tony@atomide.com --- drivers/bus/ti-sysc.c | 22 +++++++++++ drivers/clocksource/timer-ti-dm-systimer.c | 46 +++++++++++++++++----- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index bb54fb514e40fc..c6427d0bc94c98 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2864,6 +2864,24 @@ static int sysc_check_disabled_devices(struct sysc *ddata) return error; } +/* + * Ignore timers tagged with no-reset and no-idle. These are likely in use, + * for example by drivers/clocksource/timer-ti-dm-systimer.c. If more checks + * are needed, we could also look at the timer register configuration. + */ +static int sysc_check_active_timer(struct sysc *ddata) +{ + if (ddata->cap->type != TI_SYSC_OMAP2_TIMER && + ddata->cap->type != TI_SYSC_OMAP4_TIMER) + return 0; + + if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) && + (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE)) + return -EBUSY; + + return 0; +} + static const struct of_device_id sysc_match_table[] = { { .compatible = "simple-bus", }, { /* sentinel */ }, @@ -2920,6 +2938,10 @@ static int sysc_probe(struct platform_device *pdev) if (error) return error; + error = sysc_check_active_timer(ddata); + if (error) + return error; + error = sysc_get_clocks(ddata); if (error) return error; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index 6fd1f219a512ef..f6fd1c1cc527f9 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -19,7 +19,7 @@ /* For type1, set SYSC_OMAP2_CLOCKACTIVITY for fck off on idle, l4 clock on */ #define DMTIMER_TYPE1_ENABLE ((1 << 9) | (SYSC_IDLE_SMART << 3) | \ SYSC_OMAP2_ENAWAKEUP | SYSC_OMAP2_AUTOIDLE) - +#define DMTIMER_TYPE1_DISABLE (SYSC_OMAP2_SOFTRESET | SYSC_OMAP2_AUTOIDLE) #define DMTIMER_TYPE2_ENABLE (SYSC_IDLE_SMART_WKUP << 2) #define DMTIMER_RESET_WAIT 100000 @@ -44,6 +44,8 @@ struct dmtimer_systimer { u8 ctrl; u8 wakeup; u8 ifctrl; + struct clk *fck; + struct clk *ick; unsigned long rate; }; @@ -298,16 +300,20 @@ static void __init dmtimer_systimer_select_best(void) } /* Interface clocks are only available on some SoCs variants */ -static int __init dmtimer_systimer_init_clock(struct device_node *np, +static int __init dmtimer_systimer_init_clock(struct dmtimer_systimer *t, + struct device_node *np, const char *name, unsigned long *rate) { struct clk *clock; unsigned long r; + bool is_ick = false; int error; + is_ick = !strncmp(name, "ick", 3); + clock = of_clk_get_by_name(np, name); - if ((PTR_ERR(clock) == -EINVAL) && !strncmp(name, "ick", 3)) + if ((PTR_ERR(clock) == -EINVAL) && is_ick) return 0; else if (IS_ERR(clock)) return PTR_ERR(clock); @@ -320,6 +326,11 @@ static int __init dmtimer_systimer_init_clock(struct device_node *np, if (!r) return -ENODEV; + if (is_ick) + t->ick = clock; + else + t->fck = clock; + *rate = r; return 0; @@ -339,7 +350,10 @@ static void dmtimer_systimer_enable(struct dmtimer_systimer *t) static void dmtimer_systimer_disable(struct dmtimer_systimer *t) { - writel_relaxed(0, t->base + t->sysc); + if (!dmtimer_systimer_revision1(t)) + return; + + writel_relaxed(DMTIMER_TYPE1_DISABLE, t->base + t->sysc); } static int __init dmtimer_systimer_setup(struct device_node *np, @@ -366,13 +380,13 @@ static int __init dmtimer_systimer_setup(struct device_node *np, pr_err("%s: clock source init failed: %i\n", __func__, error); /* For ti-sysc, we have timer clocks at the parent module level */ - error = dmtimer_systimer_init_clock(np->parent, "fck", &rate); + error = dmtimer_systimer_init_clock(t, np->parent, "fck", &rate); if (error) goto err_unmap; t->rate = rate; - error = dmtimer_systimer_init_clock(np->parent, "ick", &rate); + error = dmtimer_systimer_init_clock(t, np->parent, "ick", &rate); if (error) goto err_unmap; @@ -496,12 +510,18 @@ static void omap_clockevent_idle(struct clock_event_device *evt) struct dmtimer_systimer *t = &clkevt->t; dmtimer_systimer_disable(t); + clk_disable(t->fck); } static void omap_clockevent_unidle(struct clock_event_device *evt) { struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt); struct dmtimer_systimer *t = &clkevt->t; + int error; + + error = clk_enable(t->fck); + if (error) + pr_err("could not enable timer fck on resume: %i\n", error); dmtimer_systimer_enable(t); writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_ena); @@ -570,8 +590,8 @@ static int __init dmtimer_clockevent_init(struct device_node *np) 3, /* Timer internal resynch latency */ 0xffffffff); - if (of_device_is_compatible(np, "ti,am33xx") || - of_device_is_compatible(np, "ti,am43")) { + if (of_machine_is_compatible("ti,am33xx") || + of_machine_is_compatible("ti,am43")) { dev->suspend = omap_clockevent_idle; dev->resume = omap_clockevent_unidle; } @@ -616,12 +636,18 @@ static void dmtimer_clocksource_suspend(struct clocksource *cs) clksrc->loadval = readl_relaxed(t->base + t->counter); dmtimer_systimer_disable(t); + clk_disable(t->fck); } static void dmtimer_clocksource_resume(struct clocksource *cs) { struct dmtimer_clocksource *clksrc = to_dmtimer_clocksource(cs); struct dmtimer_systimer *t = &clksrc->t; + int error; + + error = clk_enable(t->fck); + if (error) + pr_err("could not enable timer fck on resume: %i\n", error); dmtimer_systimer_enable(t); writel_relaxed(clksrc->loadval, t->base + t->counter); @@ -653,8 +679,8 @@ static int __init dmtimer_clocksource_init(struct device_node *np) dev->mask = CLOCKSOURCE_MASK(32); dev->flags = CLOCK_SOURCE_IS_CONTINUOUS; - if (of_device_is_compatible(np, "ti,am33xx") || - of_device_is_compatible(np, "ti,am43")) { + /* Unlike for clockevent, legacy code sets suspend only for am4 */ + if (of_machine_is_compatible("ti,am43")) { dev->suspend = dmtimer_clocksource_suspend; dev->resume = dmtimer_clocksource_resume; } From f2b2c55e512879a05456eaf5de4d1ed2f7757509 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 21 Jul 2020 15:15:30 +0900 Subject: [PATCH 087/131] udp: Copy has_conns in reuseport_grow(). If an unconnected socket in a UDP reuseport group connect()s, has_conns is set to 1. Then, when a packet is received, udp[46]_lib_lookup2() scans all sockets in udp_hslot looking for the connected socket with the highest score. However, when the number of sockets bound to the port exceeds max_socks, reuseport_grow() resets has_conns to 0. It can cause udp[46]_lib_lookup2() to return without scanning all sockets, resulting in that packets sent to connected sockets may be distributed to unconnected sockets. Therefore, reuseport_grow() should copy has_conns. Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") CC: Willem de Bruijn Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Kuniyuki Iwashima Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index adcb3aea576d6d..bbdd3c7b6cb5b9 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -101,6 +101,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; + more_reuse->has_conns = reuse->has_conns; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); From efc6b6f6c3113e8b203b9debfb72d81e0f3dcace Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 21 Jul 2020 15:15:31 +0900 Subject: [PATCH 088/131] udp: Improve load balancing for SO_REUSEPORT. Currently, SO_REUSEPORT does not work well if connected sockets are in a UDP reuseport group. Then reuseport_has_conns() returns true and the result of reuseport_select_sock() is discarded. Also, unconnected sockets have the same score, hence only does the first unconnected socket in udp_hslot always receive all packets sent to unconnected sockets. So, the result of reuseport_select_sock() should be used for load balancing. The noteworthy point is that the unconnected sockets placed after connected sockets in sock_reuseport.socks will receive more packets than others because of the algorithm in reuseport_select_sock(). index | connected | reciprocal_scale | result --------------------------------------------- 0 | no | 20% | 40% 1 | no | 20% | 20% 2 | yes | 20% | 0% 3 | no | 20% | 40% 4 | yes | 20% | 0% If most of the sockets are connected, this can be a problem, but it still works better than now. Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") CC: Willem de Bruijn Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Kuniyuki Iwashima Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 15 +++++++++------ net/ipv6/udp.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b7ebbcae4971c..99251d3c70d024 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } + + result = reuseport_result ? : sk; badness = score; - result = sk; } } return result; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7d4151747340f7..9503c87ac0b396 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } - result = sk; + + result = reuseport_result ? : sk; badness = score; } } From 015c5d5e6aa3523c758a70eb87b291cece2dbbb4 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 21 Jul 2020 15:23:12 +0900 Subject: [PATCH 089/131] net: ethernet: ravb: exit if re-initialization fails in tx timeout According to the report of [1], this driver is possible to cause the following error in ravb_tx_timeout_work(). ravb e6800000.ethernet ethernet: failed to switch device to config mode This error means that the hardware could not change the state from "Operation" to "Configuration" while some tx and/or rx queue are operating. After that, ravb_config() in ravb_dmac_init() will fail, and then any descriptors will be not allocaled anymore so that NULL pointer dereference happens after that on ravb_start_xmit(). To fix the issue, the ravb_tx_timeout_work() should check the return values of ravb_stop_dma() and ravb_dmac_init(). If ravb_stop_dma() fails, ravb_tx_timeout_work() re-enables TX and RX and just exits. If ravb_dmac_init() fails, just exits. [1] https://lore.kernel.org/linux-renesas-soc/20200518045452.2390-1-dirk.behme@de.bosch.com/ Reported-by: Dirk Behme Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 26 ++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index a442bcf64b9cd8..99f7aae102ce12 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1450,6 +1450,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) struct ravb_private *priv = container_of(work, struct ravb_private, work); struct net_device *ndev = priv->ndev; + int error; netif_tx_stop_all_queues(ndev); @@ -1458,15 +1459,36 @@ static void ravb_tx_timeout_work(struct work_struct *work) ravb_ptp_stop(ndev); /* Wait for DMA stopping */ - ravb_stop_dma(ndev); + if (ravb_stop_dma(ndev)) { + /* If ravb_stop_dma() fails, the hardware is still operating + * for TX and/or RX. So, this should not call the following + * functions because ravb_dmac_init() is possible to fail too. + * Also, this should not retry ravb_stop_dma() again and again + * here because it's possible to wait forever. So, this just + * re-enables the TX and RX and skip the following + * re-initialization procedure. + */ + ravb_rcv_snd_enable(ndev); + goto out; + } ravb_ring_free(ndev, RAVB_BE); ravb_ring_free(ndev, RAVB_NC); /* Device init */ - ravb_dmac_init(ndev); + error = ravb_dmac_init(ndev); + if (error) { + /* If ravb_dmac_init() fails, descriptors are freed. So, this + * should return here to avoid re-enabling the TX and RX in + * ravb_emac_init(). + */ + netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n", + __func__, error); + return; + } ravb_emac_init(ndev); +out: /* Initialise PTP Clock driver */ if (priv->chip_id == RCAR_GEN2) ravb_ptp_init(ndev, priv->pdev); From 9bb5fbea59f36a589ef886292549ca4052fe676c Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 21 Jul 2020 15:02:57 +0800 Subject: [PATCH 090/131] net-sysfs: add a newline when printing 'tx_timeout' by sysfs When I cat 'tx_timeout' by sysfs, it displays as follows. It's better to add a newline for easy reading. root@syzkaller:~# cat /sys/devices/virtual/net/lo/queues/tx-0/tx_timeout 0root@syzkaller:~# Signed-off-by: Xiongfeng Wang Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e353b822bb1574..7bd6440c63bf57 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1108,7 +1108,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) trans_timeout = queue->trans_timeout; spin_unlock_irq(&queue->_xmit_lock); - return sprintf(buf, "%lu", trans_timeout); + return sprintf(buf, fmt_ulong, trans_timeout); } static unsigned int get_netdev_queue_index(struct netdev_queue *queue) From b0a422772fec29811e293c7c0e6f991c0fd9241d Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 21 Jul 2020 17:11:44 +0800 Subject: [PATCH 091/131] net: udp: Fix wrong clean up for IS_UDPLITE macro We can't use IS_UDPLITE to replace udp_sk->pcflag when UDPLITE_RECV_CC is checked. Fixes: b2bf1e2659b1 ("[UDP]: Clean up for IS_UDPLITE macro") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 99251d3c70d024..4077d589b72efe 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2054,7 +2054,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { /* * MIB statistics other than incrementing the error count are diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9503c87ac0b396..a8d74f44056a68 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -646,7 +646,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", From 0ec3b6a7c026e8f404f76d6dda5dae8d262312a0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 21 Jul 2020 19:03:51 +0800 Subject: [PATCH 092/131] net: hns3: fix for not unmapping TX buffer correctly When a big TX buffer is sent using multi BD, the driver maps the whole TX buffer, and unmaps it using info in desc_cb corresponding to each BD, but only the info in the desc_cb of first BD is correct, other info in desc_cb is wrong, which causes TX unmapping problem when SMMU is on. Only set the mapping and freeing info in the desc_cb of first BD to fix this problem, because the TX buffer only need to be unmapped and freed once. Fixes: 1e8a7977d09f("net: hns3: add handling for big TX fragment") Signed-off-by: Yunsheng Lin Signed-off-by: Huzhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index c38f3bbe7d97cd..12f102647d8f84 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1118,12 +1118,12 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, return -ENOMEM; } + desc_cb->priv = priv; desc_cb->length = size; + desc_cb->dma = dma; + desc_cb->type = type; if (likely(size <= HNS3_MAX_BD_SIZE)) { - desc_cb->priv = priv; - desc_cb->dma = dma; - desc_cb->type = type; desc->addr = cpu_to_le64(dma); desc->tx.send_size = cpu_to_le16(size); desc->tx.bdtp_fe_sc_vld_ra_ri = @@ -1140,13 +1140,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* When frag size is bigger than hardware limit, split this frag */ for (k = 0; k < frag_buf_num; k++) { - /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ - desc_cb->priv = priv; - desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; - desc_cb->type = ((type == DESC_TYPE_FRAGLIST_SKB || - type == DESC_TYPE_SKB) && !k) ? - type : DESC_TYPE_PAGE; - /* now, fill the descriptor */ desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ? @@ -1158,7 +1151,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* move ring pointer to next */ ring_ptr_move_fw(ring, next_to_use); - desc_cb = &ring->desc_cb[ring->next_to_use]; desc = &ring->desc[ring->next_to_use]; } From 48ae74c9d89f827b39b5c07a1f02fc13637a3cd6 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 21 Jul 2020 19:03:52 +0800 Subject: [PATCH 093/131] net: hns3: fix for not calculating TX BD send size correctly With GRO and fraglist support, the SKB can be aggregated to a total size of 65535, and when that SKB is forwarded through a bridge, the size of the SKB may be pushed to exceed the size of 65535 when br_dev_queue_push_xmit() is called. The max send size of BD supported by the HW is 65535, when a SKB with a headlen of over 65535 is sent to the driver, the driver needs to use multi BD to send the linear data, and the send size of the last BD is calculated incorrectly by the driver who is using '&' operation, which causes a TX error. Use '%' operation to fix this problem. Fixes: 3fe13ed95dd3 ("net: hns3: avoid mult + div op in critical data path") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 12f102647d8f84..a8a5112a578c74 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1135,7 +1135,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, } frag_buf_num = hns3_tx_bd_count(size); - sizeoflast = size & HNS3_TX_LAST_SIZE_M; + sizeoflast = size % HNS3_MAX_BD_SIZE; sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE; /* When frag size is bigger than hardware limit, split this frag */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 66cd4395f78146..a8776620acbc6c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -165,8 +165,6 @@ enum hns3_nic_state { #define HNS3_TXD_MSS_S 0 #define HNS3_TXD_MSS_M (0x3fff << HNS3_TXD_MSS_S) -#define HNS3_TX_LAST_SIZE_M 0xffff - #define HNS3_VECTOR_TX_IRQ BIT_ULL(0) #define HNS3_VECTOR_RX_IRQ BIT_ULL(1) From 8ceca59fb3ed48a693171bd571c4fcbd555b7f1f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 21 Jul 2020 19:03:53 +0800 Subject: [PATCH 094/131] net: hns3: fix error handling for desc filling The content of the TX desc is automatically cleared by the HW when the HW has sent out the packet to the wire. When desc filling fails in hns3_nic_net_xmit(), it will call hns3_clear_desc() to do the error handling, which miss zeroing of the TX desc and the checking if a unmapping is needed. So add the zeroing and checking in hns3_clear_desc() to avoid the above problem. Also add DESC_TYPE_UNKNOWN to indicate the info in desc_cb is not valid, because hns3_nic_reclaim_desc() may treat the desc_cb->type of zero as packet and add to the sent pkt statistics accordingly. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d041cac9a487a8..088550db2de78e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -77,6 +77,7 @@ ((ring)->p = ((ring)->p - 1 + (ring)->desc_num) % (ring)->desc_num) enum hns_desc_type { + DESC_TYPE_UNKNOWN, DESC_TYPE_SKB, DESC_TYPE_FRAGLIST_SKB, DESC_TYPE_PAGE, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a8a5112a578c74..33c481d11116a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1338,6 +1338,10 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) unsigned int i; for (i = 0; i < ring->desc_num; i++) { + struct hns3_desc *desc = &ring->desc[ring->next_to_use]; + + memset(desc, 0, sizeof(*desc)); + /* check if this is where we started */ if (ring->next_to_use == next_to_use_orig) break; @@ -1345,6 +1349,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) /* rollback one */ ring_ptr_move_bw(ring, next_to_use); + if (!ring->desc_cb[ring->next_to_use].dma) + continue; + /* unmap the descriptor dma address */ if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB || ring->desc_cb[ring->next_to_use].type == @@ -1361,6 +1368,7 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) ring->desc_cb[ring->next_to_use].length = 0; ring->desc_cb[ring->next_to_use].dma = 0; + ring->desc_cb[ring->next_to_use].type = DESC_TYPE_UNKNOWN; } } From fac24df7b9a6d9363abdff0e351ade041dd16daa Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 21 Jul 2020 19:03:54 +0800 Subject: [PATCH 095/131] net: hns3: fix return value error when query MAC link status fail Currently, PF queries the MAC link status per second by calling function hclge_get_mac_link_status(). It return the error code when failed to send cmdq command to firmware. It's incorrect, because this return value is used as the MAC link status, which 0 means link down, and none-zero means link up. So fixes it. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Jian Shen Signed-off-by: Huazhong tan Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 49 +++++++++---------- .../hisilicon/hns3/hns3pf/hclge_main.h | 3 ++ 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d6bfdc6520dff7..bb4a6327035d15 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2673,11 +2673,10 @@ void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) delay_time); } -static int hclge_get_mac_link_status(struct hclge_dev *hdev) +static int hclge_get_mac_link_status(struct hclge_dev *hdev, int *link_status) { struct hclge_link_status_cmd *req; struct hclge_desc desc; - int link_status; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_STATUS, true); @@ -2689,33 +2688,25 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev) } req = (struct hclge_link_status_cmd *)desc.data; - link_status = req->status & HCLGE_LINK_STATUS_UP_M; + *link_status = (req->status & HCLGE_LINK_STATUS_UP_M) > 0 ? + HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN; - return !!link_status; + return 0; } -static int hclge_get_mac_phy_link(struct hclge_dev *hdev) +static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status) { - unsigned int mac_state; - int link_stat; + struct phy_device *phydev = hdev->hw.mac.phydev; + + *link_status = HCLGE_LINK_STATUS_DOWN; if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) return 0; - mac_state = hclge_get_mac_link_status(hdev); - - if (hdev->hw.mac.phydev) { - if (hdev->hw.mac.phydev->state == PHY_RUNNING) - link_stat = mac_state & - hdev->hw.mac.phydev->link; - else - link_stat = 0; - - } else { - link_stat = mac_state; - } + if (phydev && (phydev->state != PHY_RUNNING || !phydev->link)) + return 0; - return !!link_stat; + return hclge_get_mac_link_status(hdev, link_status); } static void hclge_update_link_status(struct hclge_dev *hdev) @@ -2725,6 +2716,7 @@ static void hclge_update_link_status(struct hclge_dev *hdev) struct hnae3_handle *rhandle; struct hnae3_handle *handle; int state; + int ret; int i; if (!client) @@ -2733,7 +2725,12 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state)) return; - state = hclge_get_mac_phy_link(hdev); + ret = hclge_get_mac_phy_link(hdev, &state); + if (ret) { + clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); + return; + } + if (state != hdev->hw.mac.link) { for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { handle = &hdev->vport[i].nic; @@ -6524,14 +6521,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret) { #define HCLGE_MAC_LINK_STATUS_NUM 100 + int link_status; int i = 0; int ret; do { - ret = hclge_get_mac_link_status(hdev); - if (ret < 0) + ret = hclge_get_mac_link_status(hdev, &link_status); + if (ret) return ret; - else if (ret == link_ret) + if (link_status == link_ret) return 0; msleep(HCLGE_LINK_STATUS_MS); @@ -6542,9 +6540,6 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret) static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en, bool is_phy) { -#define HCLGE_LINK_STATUS_DOWN 0 -#define HCLGE_LINK_STATUS_UP 1 - int link_ret; link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 46e6e0fef3ba5a..9bbdd4557c271c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -317,6 +317,9 @@ enum hclge_link_fail_code { HCLGE_LF_XSFP_ABSENT, }; +#define HCLGE_LINK_STATUS_DOWN 0 +#define HCLGE_LINK_STATUS_UP 1 + #define HCLGE_PG_NUM 4 #define HCLGE_SCH_MODE_SP 0 #define HCLGE_SCH_MODE_DWRR 1 From 3506b2f42dff66ea6814c3dfa1988bafb79e6f88 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 21 Jul 2020 13:07:39 +0200 Subject: [PATCH 096/131] net: dsa: microchip: call phy_remove_link_mode during probe When doing "ip link set dev ... up" for a ksz9477 backed link, ksz9477_phy_setup is called and it calls phy_remove_link_mode to remove 1000baseT HDX. During phy_remove_link_mode, phy_advertise_supported is called. Doing so reverts any previous change to advertised link modes e.g. using a udevd .link file. phy_remove_link_mode is not meant to be used while opening a link and should be called during phy probe when the link is not yet available to userspace. Therefore move the phy_remove_link_mode calls into ksz9477_switch_register. It indirectly calls dsa_register_switch, which creates the relevant struct phy_devices and we update the link modes right after that. At that time dev->features is already initialized by ksz9477_switch_detect. Remove phy_setup from ksz_dev_ops as no users remain. Link: https://lore.kernel.org/netdev/20200715192722.GD1256692@lunn.ch/ Fixes: 42fc6a4c613019 ("net: dsa: microchip: prepare PHY for proper advertisement") Signed-off-by: Helmut Grohne Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477.c | 42 ++++++++++++++------------ drivers/net/dsa/microchip/ksz_common.c | 2 -- drivers/net/dsa/microchip/ksz_common.h | 2 -- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 8d15c301602461..4a9239b2c2e4ac 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -974,23 +974,6 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port, PORT_MIRROR_SNIFFER, false); } -static void ksz9477_phy_setup(struct ksz_device *dev, int port, - struct phy_device *phy) -{ - /* Only apply to port with PHY. */ - if (port >= dev->phy_port_cnt) - return; - - /* The MAC actually cannot run in 1000 half-duplex mode. */ - phy_remove_link_mode(phy, - ETHTOOL_LINK_MODE_1000baseT_Half_BIT); - - /* PHY does not support gigabit. */ - if (!(dev->features & GBIT_SUPPORT)) - phy_remove_link_mode(phy, - ETHTOOL_LINK_MODE_1000baseT_Full_BIT); -} - static bool ksz9477_get_gbit(struct ksz_device *dev, u8 data) { bool gbit; @@ -1603,7 +1586,6 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { .get_port_addr = ksz9477_get_port_addr, .cfg_port_member = ksz9477_cfg_port_member, .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table, - .phy_setup = ksz9477_phy_setup, .port_setup = ksz9477_port_setup, .r_mib_cnt = ksz9477_r_mib_cnt, .r_mib_pkt = ksz9477_r_mib_pkt, @@ -1617,7 +1599,29 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { int ksz9477_switch_register(struct ksz_device *dev) { - return ksz_switch_register(dev, &ksz9477_dev_ops); + int ret, i; + struct phy_device *phydev; + + ret = ksz_switch_register(dev, &ksz9477_dev_ops); + if (ret) + return ret; + + for (i = 0; i < dev->phy_port_cnt; ++i) { + if (!dsa_is_user_port(dev->ds, i)) + continue; + + phydev = dsa_to_port(dev->ds, i)->slave->phydev; + + /* The MAC actually cannot run in 1000 half-duplex mode. */ + phy_remove_link_mode(phydev, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + + /* PHY does not support gigabit. */ + if (!(dev->features & GBIT_SUPPORT)) + phy_remove_link_mode(phydev, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT); + } + return ret; } EXPORT_SYMBOL(ksz9477_switch_register); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index fd1d6676ae4fda..7b6c0dce753600 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -358,8 +358,6 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) /* setup slave port */ dev->dev_ops->port_setup(dev, port, false); - if (dev->dev_ops->phy_setup) - dev->dev_ops->phy_setup(dev, port, phy); /* port_stp_state_set() will be called after to enable the port so * there is no need to do anything. diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index f2c9bb68fd3306..7d11dd32ec0d1a 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -119,8 +119,6 @@ struct ksz_dev_ops { u32 (*get_port_addr)(int port, int offset); void (*cfg_port_member)(struct ksz_device *dev, int port, u8 member); void (*flush_dyn_mac_table)(struct ksz_device *dev, int port); - void (*phy_setup)(struct ksz_device *dev, int port, - struct phy_device *phy); void (*port_cleanup)(struct ksz_device *dev, int port); void (*port_setup)(struct ksz_device *dev, int port, bool cpu_port); void (*r_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 *val); From 2c9d8e01f0c6017317eee7638496173d4a64e6bc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 21 Jul 2020 14:51:50 +0000 Subject: [PATCH 097/131] netdevsim: fix unbalaced locking in nsim_create() In the nsim_create(), rtnl_lock() is called before nsim_bpf_init(). If nsim_bpf_init() is failed, rtnl_unlock() should be called, but it isn't called. So, unbalanced locking would occur. Fixes: e05b2d141fef ("netdevsim: move netdev creation/destruction to dev probe") Signed-off-by: Taehee Yoo Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 2908e0a0d6e19a..23950e7a0f81ef 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -302,7 +302,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) rtnl_lock(); err = nsim_bpf_init(ns); if (err) - goto err_free_netdev; + goto err_rtnl_unlock; nsim_ipsec_init(ns); @@ -316,8 +316,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) err_ipsec_teardown: nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); +err_rtnl_unlock: rtnl_unlock(); -err_free_netdev: free_netdev(dev); return ERR_PTR(err); } From 1ea999039fe7c7953da2fbb7ca7c3ef00064d328 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 21 Jul 2020 17:41:42 +0300 Subject: [PATCH 098/131] qed: suppress "don't support RoCE & iWARP" flooding on HW init Change the verbosity of the "don't support RoCE & iWARP simultaneously" warning to debug level to stop flooding on driver/hardware initialization: [ 4.783230] qede 01:00.00: Storm FW 8.37.7.0, Management FW 8.52.9.0 [MBI 15.10.6] [eth0] [ 4.810020] [qed_rdma_set_pf_params:2076()]Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only [ 4.861186] qede 01:00.01: Storm FW 8.37.7.0, Management FW 8.52.9.0 [MBI 15.10.6] [eth1] [ 4.893311] [qed_rdma_set_pf_params:2076()]Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only [ 5.181713] qede a1:00.00: Storm FW 8.37.7.0, Management FW 8.52.9.0 [MBI 15.10.6] [eth2] [ 5.224740] [qed_rdma_set_pf_params:2076()]Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only [ 5.276449] qede a1:00.01: Storm FW 8.37.7.0, Management FW 8.52.9.0 [MBI 15.10.6] [eth3] [ 5.318671] [qed_rdma_set_pf_params:2076()]Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only [ 5.369548] qede a1:00.02: Storm FW 8.37.7.0, Management FW 8.52.9.0 [MBI 15.10.6] [eth4] [ 5.411645] [qed_rdma_set_pf_params:2076()]Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only Fixes: e0a8f9de16fc ("qed: Add iWARP enablement support") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 08ba9d54ab63a5..d13ec88313c386 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2008,8 +2008,8 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, enum protocol_type proto; if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) { - DP_NOTICE(p_hwfn, - "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE; } From eb61c2d69903e977ffa2b80b1da9d1f758cf228d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 21 Jul 2020 17:41:43 +0300 Subject: [PATCH 099/131] qed: suppress false-positives interrupt error messages on HW init It was found that qed_pglueb_rbc_attn_handler() can produce a lot of false-positive error detections on driver load/reload (especially after crashes/recoveries) and spam the kernel log: [ 4.958275] [qed_pglueb_rbc_attn_handler:324()]ICPL error - 00d00ff0 [ 2079.146764] [qed_pglueb_rbc_attn_handler:324()]ICPL error - 00d80ff0 [ 2116.374631] [qed_pglueb_rbc_attn_handler:324()]ICPL error - 00d80ff0 [ 2135.250564] [qed_pglueb_rbc_attn_handler:324()]ICPL error - 00d80ff0 [...] Reduce the logging level of two false-positive prone error messages from notice to verbose on initialization (only) to not mix it with real error attentions while debugging. Fixes: 666db4862f2d ("qed: Revise load sequence to avoid PCI errors") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_int.c | 50 +++++++++++++---------- drivers/net/ethernet/qlogic/qed/qed_int.h | 4 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9c26fde663b385..dbdac983ccde55 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3102,7 +3102,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) } /* Log and clear previous pglue_b errors if such exist */ - qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt); + qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt, true); /* Enable the PF's internal FID_enable in the PXP */ rc = qed_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt, diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 7e13a9d9b89c69..5eec1fc6229d94 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -257,9 +257,10 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn) #define PGLUE_ATTENTION_ZLR_VALID (1 << 25) #define PGLUE_ATTENTION_ILT_VALID (1 << 23) -int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + bool hw_init) { + char msg[256]; u32 tmp; tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2); @@ -273,22 +274,23 @@ int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, details = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS); - DP_NOTICE(p_hwfn, - "Illegal write by chip to [%08x:%08x] blocked.\n" - "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" - "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n", - addr_hi, addr_lo, details, - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), - GET_FIELD(details, - PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0, - tmp, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0); + snprintf(msg, sizeof(msg), + "Illegal write by chip to [%08x:%08x] blocked.\n" + "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" + "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]", + addr_hi, addr_lo, details, + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), + !!GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VF_VALID), + tmp, + !!GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_WAS_ERR), + !!GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_BME), + !!GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_FID_EN)); + + if (hw_init) + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "%s\n", msg); + else + DP_NOTICE(p_hwfn, "%s\n", msg); } tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2); @@ -321,8 +323,14 @@ int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, } tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL); - if (tmp & PGLUE_ATTENTION_ICPL_VALID) - DP_NOTICE(p_hwfn, "ICPL error - %08x\n", tmp); + if (tmp & PGLUE_ATTENTION_ICPL_VALID) { + snprintf(msg, sizeof(msg), "ICPL error - %08x", tmp); + + if (hw_init) + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "%s\n", msg); + else + DP_NOTICE(p_hwfn, "%s\n", msg); + } tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS); if (tmp & PGLUE_ATTENTION_ZLR_VALID) { @@ -361,7 +369,7 @@ int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn) { - return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt); + return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt, false); } static int qed_fw_assertion(struct qed_hwfn *p_hwfn) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index e09db338636700..110169e9012199 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -442,7 +442,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, #define QED_MAPPING_MEMORY_SIZE(dev) (NUM_OF_SBS(dev)) -int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt); +int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + bool hw_init); #endif From b346c0c85892cb8c53e8715734f71ba5bbec3387 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Tue, 21 Jul 2020 18:17:10 +0200 Subject: [PATCH 100/131] selftest: txtimestamp: fix net ns entry logic According to 'man 8 ip-netns', if `ip netns identify` returns an empty string, there's no net namespace associated with current PID: fix the net ns entrance logic. Signed-off-by: Paolo Pisati Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/txtimestamp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index eea6f5193693f6..31637769f59f60 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -75,7 +75,7 @@ main() { fi } -if [[ "$(ip netns identify)" == "root" ]]; then +if [[ -z "$(ip netns identify)" ]]; then ./in_netns.sh $0 $@ else main $@ From 8210e344ccb798c672ab237b1a4f241bda08909b Mon Sep 17 00:00:00 2001 From: guodeqing Date: Thu, 16 Jul 2020 16:12:08 +0800 Subject: [PATCH 101/131] ipvs: fix the connection sync failed in some cases The sync_thread_backup only checks sk_receive_queue is empty or not, there is a situation which cannot sync the connection entries when sk_receive_queue is empty and sk_rmem_alloc is larger than sk_rcvbuf, the sync packets are dropped in __udp_enqueue_schedule_skb, this is because the packets in reader_queue is not read, so the rmem is not reclaimed. Here I add the check of whether the reader_queue of the udp sock is empty or not to solve this problem. Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Reported-by: zhouxudong Signed-off-by: guodeqing Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sync.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 605e0f68f8bd30..2b8abbfe018cf4 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1717,6 +1717,8 @@ static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; + struct sock *sk = tinfo->sock->sk; + struct udp_sock *up = udp_sk(sk); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " @@ -1724,12 +1726,14 @@ static int sync_thread_backup(void *data) ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { - wait_event_interruptible(*sk_sleep(tinfo->sock->sk), - !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) - || kthread_should_stop()); + wait_event_interruptible(*sk_sleep(sk), + !skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue) || + kthread_should_stop()); /* do we have data now? */ - while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { + while (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue)) { len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->bcfg.sync_maxlen); if (len <= 0) { From 8fdcabeac39824fe67480fd9508d80161c541854 Mon Sep 17 00:00:00 2001 From: Xie He Date: Thu, 16 Jul 2020 16:44:33 -0700 Subject: [PATCH 102/131] drivers/net/wan/x25_asy: Fix to make it work This driver is not working because of problems of its receiving code. This patch fixes it to make it work. When the driver receives an LAPB frame, it should first pass the frame to the LAPB module to process. After processing, the LAPB module passes the data (the packet) back to the driver, the driver should then add a one-byte pseudo header and pass the data to upper layers. The changes to the "x25_asy_bump" function and the "x25_asy_data_indication" function are to correctly implement this procedure. Also, the "x25_asy_unesc" function ignores any frame that is shorter than 3 bytes. However the shortest frames are 2-byte long. So we need to change it to allow 2-byte frames to pass. Cc: Eric Dumazet Cc: Martin Schiller Signed-off-by: Xie He Reviewed-by: Martin Schiller Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 69773d228ec175..84640a0c13f359 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -183,7 +183,7 @@ static inline void x25_asy_unlock(struct x25_asy *sl) netif_wake_queue(sl->dev); } -/* Send one completely decapsulated IP datagram to the IP layer. */ +/* Send an LAPB frame to the LAPB module to process. */ static void x25_asy_bump(struct x25_asy *sl) { @@ -195,13 +195,12 @@ static void x25_asy_bump(struct x25_asy *sl) count = sl->rcount; dev->stats.rx_bytes += count; - skb = dev_alloc_skb(count+1); + skb = dev_alloc_skb(count); if (skb == NULL) { netdev_warn(sl->dev, "memory squeeze, dropping packet\n"); dev->stats.rx_dropped++; return; } - skb_push(skb, 1); /* LAPB internal control */ skb_put_data(skb, sl->rbuff, count); skb->protocol = x25_type_trans(skb, sl->dev); err = lapb_data_received(skb->dev, skb); @@ -209,7 +208,6 @@ static void x25_asy_bump(struct x25_asy *sl) kfree_skb(skb); printk(KERN_DEBUG "x25_asy: data received err - %d\n", err); } else { - netif_rx(skb); dev->stats.rx_packets++; } } @@ -356,12 +354,21 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, */ /* - * Called when I frame data arrives. We did the work above - throw it - * at the net layer. + * Called when I frame data arrive. We add a pseudo header for upper + * layers and pass it to upper layers. */ static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb) { + if (skb_cow(skb, 1)) { + kfree_skb(skb); + return NET_RX_DROP; + } + skb_push(skb, 1); + skb->data[0] = X25_IFACE_DATA; + + skb->protocol = x25_type_trans(skb, dev); + return netif_rx(skb); } @@ -657,7 +664,7 @@ static void x25_asy_unesc(struct x25_asy *sl, unsigned char s) switch (s) { case X25_END: if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount > 2) + sl->rcount >= 2) x25_asy_bump(sl); clear_bit(SLF_ESCAPE, &sl->flags); sl->rcount = 0; From 1264d7fa3a64d8bea7aebb77253f917947ffda25 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 17 Jul 2020 10:50:49 +0800 Subject: [PATCH 103/131] net: ethernet: ave: Fix error returns in ave_init When regmap_update_bits failed in ave_init(), calls of the functions reset_control_assert() and clk_disable_unprepare() were missed. Add goto out_reset_assert to do this. Fixes: 57878f2f4697 ("net: ethernet: ave: add support for phy-mode setting of system controller") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/sni_ave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index f2638446b62e69..81b554dd7221bc 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1191,7 +1191,7 @@ static int ave_init(struct net_device *ndev) ret = regmap_update_bits(priv->regmap, SG_ETPINMODE, priv->pinmode_mask, priv->pinmode_val); if (ret) - return ret; + goto out_reset_assert; ave_global_reset(ndev); From 2c4dc31486034fa8929a571e7a59677e92b5d6c5 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Fri, 17 Jul 2020 15:19:32 +0300 Subject: [PATCH 104/131] net: ethernet: ti: add NETIF_F_HW_TC hw feature flag for taprio offload Currently drive supports taprio offload which is a tc feature offloaded to cpsw hardware. So driver has to set the hw feature flag, NETIF_F_HW_TC in the net device to be compliant. This patch adds the flag. Fixes: 8127224c2708 ("ethernet: ti: am65-cpsw-qos: add TAPRIO offload support") Signed-off-by: Murali Karicheri Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1492648247d96c..6d778bc3d012f3 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1850,7 +1850,8 @@ static int am65_cpsw_nuss_init_ndev_2g(struct am65_cpsw_common *common) port->ndev->max_mtu = AM65_CPSW_MAX_PACKET_SIZE; port->ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | + NETIF_F_HW_TC; port->ndev->features = port->ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; port->ndev->vlan_features |= NETIF_F_SG; From de2b41be8fcccb2f5b6c480d35df590476344201 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 21 Jul 2020 11:34:48 +0200 Subject: [PATCH 105/131] x86, vmlinux.lds: Page-align end of ..page_aligned sections On x86-32 the idt_table with 256 entries needs only 2048 bytes. It is page-aligned, but the end of the .bss..page_aligned section is not guaranteed to be page-aligned. As a result, objects from other .bss sections may end up on the same 4k page as the idt_table, and will accidentially get mapped read-only during boot, causing unexpected page-faults when the kernel writes to them. This could be worked around by making the objects in the page aligned sections page sized, but that's wrong. Explicit sections which store only page aligned objects have an implicit guarantee that the object is alone in the page in which it is placed. That works for all objects except the last one. That's inconsistent. Enforcing page sized objects for these sections would wreckage memory sanitizers, because the object becomes artificially larger than it should be and out of bound access becomes legit. Align the end of the .bss..page_aligned and .data..page_aligned section on page-size so all objects places in these sections are guaranteed to have their own page. [ tglx: Amended changelog ] Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200721093448.10417-1-joro@8bytes.org --- arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3bfc8dd8a43dbf..9a03e5b23135af 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -358,6 +358,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef218d7d2..052e0f05a98417 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -341,7 +341,8 @@ #define PAGE_ALIGNED_DATA(page_align) \ . = ALIGN(page_align); \ - *(.data..page_aligned) + *(.data..page_aligned) \ + . = ALIGN(page_align); #define READ_MOSTLY_DATA(align) \ . = ALIGN(align); \ @@ -737,7 +738,9 @@ . = ALIGN(bss_align); \ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { \ BSS_FIRST_SECTIONS \ + . = ALIGN(PAGE_SIZE); \ *(.bss..page_aligned) \ + . = ALIGN(PAGE_SIZE); \ *(.dynbss) \ *(BSS_MAIN) \ *(COMMON) \ From d136122f58458479fd8926020ba2937de61d7f65 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Jul 2020 17:20:21 +0200 Subject: [PATCH 106/131] sched: Fix race against ptrace_freeze_trace() There is apparently one site that violates the rule that only current and ttwu() will modify task->state, namely ptrace_{,un}freeze_traced() will change task->state for a remote task. Oleg explains: "TASK_TRACED/TASK_STOPPED was always protected by siglock. In particular, ttwu(__TASK_TRACED) must be always called with siglock held. That is why ptrace_freeze_traced() assumes it can safely do s/TASK_TRACED/__TASK_TRACED/ under spin_lock(siglock)." This breaks the ordering scheme introduced by commit: dbfb089d360b ("sched: Fix loadavg accounting race") Specifically, the reload not matching no longer implies we don't have to block. Simply things by noting that what we need is a LOAD->STORE ordering and this can be provided by a control dependency. So replace: prev_state = prev->state; raw_spin_lock(&rq->lock); smp_mb__after_spinlock(); /* SMP-MB */ if (... && prev_state && prev_state == prev->state) deactivate_task(); with: prev_state = prev->state; if (... && prev_state) /* CTRL-DEP */ deactivate_task(); Since that already implies the 'prev->state' load must be complete before allowing the 'prev->on_rq = 0' store to become visible. Fixes: dbfb089d360b ("sched: Fix loadavg accounting race") Reported-by: Jiri Slaby Signed-off-by: Peter Zijlstra (Intel) Acked-by: Oleg Nesterov Tested-by: Paul Gortmaker Tested-by: Christian Brauner --- kernel/sched/core.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e15543cb848128..5dece9b34e25ec 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4119,9 +4119,6 @@ static void __sched notrace __schedule(bool preempt) local_irq_disable(); rcu_note_context_switch(preempt); - /* See deactivate_task() below. */ - prev_state = prev->state; - /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) @@ -4145,11 +4142,16 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); switch_count = &prev->nivcsw; + /* - * We must re-load prev->state in case ttwu_remote() changed it - * before we acquired rq->lock. + * We must load prev->state once (task_struct::state is volatile), such + * that: + * + * - we form a control dependency vs deactivate_task() below. + * - ptrace_{,un}freeze_traced() can change ->state underneath us. */ - if (!preempt && prev_state && prev_state == prev->state) { + prev_state = prev->state; + if (!preempt && prev_state) { if (signal_pending_state(prev_state, prev)) { prev->state = TASK_RUNNING; } else { @@ -4163,10 +4165,12 @@ static void __sched notrace __schedule(bool preempt) /* * __schedule() ttwu() - * prev_state = prev->state; if (READ_ONCE(p->on_rq) && ...) - * LOCK rq->lock goto out; - * smp_mb__after_spinlock(); smp_acquire__after_ctrl_dep(); - * p->on_rq = 0; p->state = TASK_WAKING; + * prev_state = prev->state; if (p->on_rq && ...) + * if (prev_state) goto out; + * p->on_rq = 0; smp_acquire__after_ctrl_dep(); + * p->state = TASK_WAKING + * + * Where __schedule() and ttwu() have matching control dependencies. * * After this, schedule() must not care about p->state any more. */ From 372a8eaa05998cd45b3417d0e0ffd3a70978211a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jul 2020 09:04:25 -0500 Subject: [PATCH 107/131] x86/unwind/orc: Fix ORC for newly forked tasks The ORC unwinder fails to unwind newly forked tasks which haven't yet run on the CPU. It correctly reads the 'ret_from_fork' instruction pointer from the stack, but it incorrectly interprets that value as a call stack address rather than a "signal" one, so the address gets incorrectly decremented in the call to orc_find(), resulting in bad ORC data. Fix it by forcing 'ret_from_fork' frames to be signal frames. Reported-by: Wang ShaoBo Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Wang ShaoBo Link: https://lkml.kernel.org/r/f91a8778dde8aae7f71884b5df2b16d552040441.1594994374.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 7f969b2d240fd9..ec88bbe08a3281 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -440,8 +440,11 @@ bool unwind_next_frame(struct unwind_state *state) /* * Find the orc_entry associated with the text address. * - * Decrement call return addresses by one so they work for sibling - * calls and calls to noreturn functions. + * For a call frame (as opposed to a signal frame), state->ip points to + * the instruction after the call. That instruction's stack layout + * could be different from the call instruction's layout, for example + * if the call was to a noreturn function. So get the ORC data for the + * call instruction itself. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); if (!orc) { @@ -662,6 +665,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = task->thread.sp; state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); + state->signal = (void *)state->ip == ret_from_fork; } if (get_stack_info((unsigned long *)state->sp, state->task, From 039a7a30ec102ec866d382a66f87f6f7654f8140 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jul 2020 09:04:26 -0500 Subject: [PATCH 108/131] x86/stacktrace: Fix reliable check for empty user task stacks If a user task's stack is empty, or if it only has user regs, ORC reports it as a reliable empty stack. But arch_stack_walk_reliable() incorrectly treats it as unreliable. That happens because the only success path for user tasks is inside the loop, which only iterates on non-empty stacks. Generally, a user task must end in a user regs frame, but an empty stack is an exception to that rule. Thanks to commit 71c95825289f ("x86/unwind/orc: Fix error handling in __unwind_start()"), unwind_start() now sets state->error appropriately. So now for both ORC and FP unwinders, unwind_done() and !unwind_error() always means the end of the stack was successfully reached. So the success path for kthreads is no longer needed -- it can also be used for empty user tasks. Reported-by: Wang ShaoBo Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Wang ShaoBo Link: https://lkml.kernel.org/r/f136a4e5f019219cbc4f4da33b30c2f44fa65b84.1594994374.git.jpoimboe@redhat.com --- arch/x86/kernel/stacktrace.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6ad43fc44556e2..2fd698e28e4d5d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, * or a page fault), which can make frame pointers * unreliable. */ - if (IS_ENABLED(CONFIG_FRAME_POINTER)) return -EINVAL; } @@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (unwind_error(&state)) return -EINVAL; - /* Success path for non-user tasks, i.e. kthreads and idle tasks */ - if (!(task->flags & (PF_KTHREAD | PF_IDLE))) - return -EINVAL; - return 0; } From d181d2da0141371bbc360eaea78719203e165e1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Jul 2020 10:39:54 +0200 Subject: [PATCH 109/131] x86/dumpstack: Dump user space code correctly again H.J. reported that post 5.7 a segfault of a user space task does not longer dump the Code bytes when /proc/sys/debug/exception-trace is enabled. It prints 'Code: Bad RIP value.' instead. This was broken by a recent change which made probe_kernel_read() reject non-kernel addresses. Update show_opcodes() so it retrieves user space opcodes via copy_from_user_nmi(). Fixes: 98a23609b103 ("maccess: always use strict semantics for probe_kernel_read") Reported-by: H.J. Lu Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87h7tz306w.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/dumpstack.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b037cfa7c0c52f..7401cc12c3ccf4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -71,6 +71,22 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, + unsigned int nbytes) +{ + if (!user_mode(regs)) + return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + + /* + * Make sure userspace isn't trying to trick us into dumping kernel + * memory by pointing the userspace instruction pointer at it. + */ + if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) + return -EINVAL; + + return copy_from_user_nmi(buf, (void __user *)src, nbytes); +} + /* * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: * @@ -97,17 +113,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - bool bad_ip; - - /* - * Make sure userspace isn't trying to trick us into dumping kernel - * memory by pointing the userspace instruction pointer at it. - */ - bad_ip = user_mode(regs) && - __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, - OPCODE_BUFSIZE)) { + if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { printk("%sCode: Bad RIP value.\n", loglvl); } else { printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" From 26cb7085c8984e5b71d65c374a135134ed8cabb3 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Wed, 22 Jul 2020 17:40:12 +0300 Subject: [PATCH 110/131] enetc: Remove the mdio bus on PF probe bailout For ENETC ports that register an external MDIO bus, the bus doesn't get removed on the error bailout path of enetc_pf_probe(). This issue became much more visible after recent: commit 07095c025ac2 ("net: enetc: Use DT protocol information to set up the ports") Before this commit, one could make probing fail on the error path only by having register_netdev() fail, which is unlikely. But after this commit, because it moved the enetc_of_phy_get() call up in the probing sequence, now we can trigger an mdiobus_free() bug just by forcing enetc_alloc_msix() to return error, i.e. with the 'pci=nomsi' kernel bootarg (since ENETC relies on MSI support to work), as the calltrace below shows: kernel BUG at /home/eiz/work/enetc/net/drivers/net/phy/mdio_bus.c:648! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [...] Hardware name: LS1028A RDB Board (DT) pstate: 80000005 (Nzcv daif -PAN -UAO BTYPE=--) pc : mdiobus_free+0x50/0x58 lr : devm_mdiobus_free+0x14/0x20 [...] Call trace: mdiobus_free+0x50/0x58 devm_mdiobus_free+0x14/0x20 release_nodes+0x138/0x228 devres_release_all+0x38/0x60 really_probe+0x1c8/0x368 driver_probe_device+0x5c/0xc0 device_driver_attach+0x74/0x80 __driver_attach+0x8c/0xd8 bus_for_each_dev+0x7c/0xd8 driver_attach+0x24/0x30 bus_add_driver+0x154/0x200 driver_register+0x64/0x120 __pci_register_driver+0x44/0x50 enetc_pf_driver_init+0x24/0x30 do_one_initcall+0x60/0x1c0 kernel_init_freeable+0x1fc/0x274 kernel_init+0x14/0x110 ret_from_fork+0x10/0x34 Fixes: ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 4fac57dbb3c8fd..7a9675bd36e8bd 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -906,6 +906,7 @@ static int enetc_pf_probe(struct pci_dev *pdev, return 0; err_reg_netdev: + enetc_mdio_remove(pf); enetc_of_put_phy(priv); enetc_free_msix(priv); err_alloc_msix: From 2f2a7ffad5c6cbf3d438e813cfdc88230e185ba6 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 22 Jul 2020 11:19:01 -0400 Subject: [PATCH 111/131] AX.25: Fix out-of-bounds read in ax25_connect() Checks on `addr_len` and `fsa->fsa_ax25.sax25_ndigis` are insufficient. ax25_connect() can go out of bounds when `fsa->fsa_ax25.sax25_ndigis` equals to 7 or 8. Fix it. This issue has been reported as a KMSAN uninit-value bug, because in such a case, ax25_connect() reaches into the uninitialized portion of the `struct sockaddr_storage` statically allocated in __sys_connect(). It is safe to remove `fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS` because `addr_len` is guaranteed to be less than or equal to `sizeof(struct full_sockaddr_ax25)`. Reported-by: syzbot+c82752228ed975b0a623@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=55ef9d629f3b3d7d70b69558015b63b48d01af66 Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd91cd34f25e03..ef5bf116157a5d 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1187,7 +1187,9 @@ static int __must_check ax25_connect(struct socket *sock, if (addr_len > sizeof(struct sockaddr_ax25) && fsa->fsa_ax25.sax25_ndigis != 0) { /* Valid number of digipeaters ? */ - if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) { + if (fsa->fsa_ax25.sax25_ndigis < 1 || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { err = -EINVAL; goto out_release; } From 8f13399db22f909a35735bf8ae2f932e0c8f0e30 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 22 Jul 2020 23:52:11 +0800 Subject: [PATCH 112/131] sctp: shrink stream outq only when new outcnt < old outcnt It's not necessary to go list_for_each for outq->out_chunk_list when new outcnt >= old outcnt, as no chunk with higher sid than new (outcnt - 1) exists in the outqueue. While at it, also move the list_for_each code in a new function sctp_stream_shrink_out(), which will be used in the next patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/stream.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 67f7e71f912988..4f87693cc03625 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -22,17 +22,11 @@ #include #include -/* Migrates chunks from stream queues to new stream queues if needed, - * but not across associations. Also, removes those chunks to streams - * higher than the new max. - */ -static void sctp_stream_outq_migrate(struct sctp_stream *stream, - struct sctp_stream *new, __u16 outcnt) +static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) { struct sctp_association *asoc; struct sctp_chunk *ch, *temp; struct sctp_outq *outq; - int i; asoc = container_of(stream, struct sctp_association, stream); outq = &asoc->outqueue; @@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, sctp_chunk_free(ch); } +} + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + int i; + + if (stream->outcnt > outcnt) + sctp_stream_shrink_out(stream, outcnt); if (new) { /* Here we actually move the old ext stuff into the new From 3ecdda3e9ad837cf9cb41b6faa11b1af3a5abc0c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 22 Jul 2020 23:52:12 +0800 Subject: [PATCH 113/131] sctp: shrink stream outq when fails to do addstream reconf When adding a stream with stream reconf, the new stream firstly is in CLOSED state but new out chunks can still be enqueued. Then once gets the confirmation from the peer, the state will change to OPEN. However, if the peer denies, it needs to roll back the stream. But when doing that, it only sets the stream outcnt back, and the chunks already in the new stream don't get purged. It caused these chunks can still be dequeued in sctp_outq_dequeue_data(). As its stream is still in CLOSE, the chunk will be enqueued to the head again by sctp_outq_head_data(). This chunk will never be sent out, and the chunks after it can never be dequeued. The assoc will be 'hung' in a dead loop of sending this chunk. To fix it, this patch is to purge these chunks already in the new stream by calling sctp_stream_shrink_out() when failing to do the addstream reconf. Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 4f87693cc03625..bda2536dd740f7 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -1044,11 +1044,13 @@ struct sctp_chunk *sctp_process_strreset_resp( nums = ntohs(addstrm->number_of_streams); number = stream->outcnt - nums; - if (result == SCTP_STRRESET_PERFORMED) + if (result == SCTP_STRRESET_PERFORMED) { for (i = number; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; - else + } else { + sctp_stream_shrink_out(stream, number); stream->outcnt = number; + } *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, 0, nums, GFP_ATOMIC); From 8885bb0621f01a6c82be60a91e5fc0f6e2f71186 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 22 Jul 2020 12:05:12 -0400 Subject: [PATCH 114/131] AX.25: Prevent out-of-bounds read in ax25_sendmsg() Checks on `addr_len` and `usax->sax25_ndigis` are insufficient. ax25_sendmsg() can go out of bounds when `usax->sax25_ndigis` equals to 7 or 8. Fix it. It is safe to remove `usax->sax25_ndigis > AX25_MAX_DIGIS`, since `addr_len` is guaranteed to be less than or equal to `sizeof(struct full_sockaddr_ax25)` Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ef5bf116157a5d..0862fe49d43497 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1509,7 +1509,8 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; /* Valid number of digipeaters ? */ - if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) { + if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * usax->sax25_ndigis) { err = -EINVAL; goto out; } From 901f3cc1639e5103a8997f6ab243785d292e2074 Mon Sep 17 00:00:00 2001 From: Egor Pomozov Date: Wed, 22 Jul 2020 22:09:58 +0300 Subject: [PATCH 115/131] net: atlantic: fix PTP on AQC10X This patch fixes PTP on AQC10X. PTP support on AQC10X requires FW involvement and FW configures the TPS data arb mode itself. So we must make sure driver doesn't touch TPS data arb mode on AQC10x if PTP is enabled. Otherwise, there are no timestamps even though packets are flowing. Fixes: 2deac71ac492a ("net: atlantic: QoS implementation: min_rate") Signed-off-by: Egor Pomozov Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index d2bc6b289a5458..2125bc20ab6a7a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -373,8 +373,13 @@ static int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self) /* WSP, if min_rate is set for at least one TC. * RR otherwise. + * + * NB! MAC FW sets arb mode itself if PTP is enabled. We shouldn't + * overwrite it here in that case. */ - hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U); + if (!nic_cfg->is_ptp) + hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U); + /* Data TC Arbiter takes precedence over Descriptor TC Arbiter, * leave Descriptor TC Arbiter as RR. */ From e6827d1abdc9b061a57d7b7d3019c4e99fabea2f Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 22 Jul 2020 21:58:39 -0500 Subject: [PATCH 116/131] cxgb4: add missing release on skb in uld_send() In the implementation of uld_send(), the skb is consumed on all execution paths except one. Release skb when returning NET_XMIT_DROP. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 32a45dc51ed796..92eee66cbc8459 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2938,6 +2938,7 @@ static inline int uld_send(struct adapter *adap, struct sk_buff *skb, txq_info = adap->sge.uld_txq_info[tx_uld_type]; if (unlikely(!txq_info)) { WARN_ON(true); + kfree_skb(skb); return NET_XMIT_DROP; } From 17ad73e941b71f3bec7523ea4e9cbc3752461c2d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jul 2020 17:49:57 +0300 Subject: [PATCH 117/131] AX.25: Prevent integer overflows in connect and sendmsg We recently added some bounds checking in ax25_connect() and ax25_sendmsg() and we so we removed the AX25_MAX_DIGIS checks because they were no longer required. Unfortunately, I believe they are required to prevent integer overflows so I have added them back. Fixes: 8885bb0621f0 ("AX.25: Prevent out-of-bounds read in ax25_sendmsg()") Fixes: 2f2a7ffad5c6 ("AX.25: Fix out-of-bounds read in ax25_connect()") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 0862fe49d43497..dec3f35467c9e1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1188,6 +1188,7 @@ static int __must_check ax25_connect(struct socket *sock, fsa->fsa_ax25.sax25_ndigis != 0) { /* Valid number of digipeaters ? */ if (fsa->fsa_ax25.sax25_ndigis < 1 || + fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS || addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { err = -EINVAL; @@ -1509,7 +1510,9 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; /* Valid number of digipeaters ? */ - if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) + + if (usax->sax25_ndigis < 1 || + usax->sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * usax->sax25_ndigis) { err = -EINVAL; goto out; From 76be93fc0702322179bb0ea87295d820ee46ad14 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 23 Jul 2020 12:00:06 -0700 Subject: [PATCH 118/131] tcp: allow at most one TLP probe per flight Previously TLP may send multiple probes of new data in one flight. This happens when the sender is cwnd limited. After the initial TLP containing new data is sent, the sender receives another ACK that acks partial inflight. It may re-arm another TLP timer to send more, if no further ACK returns before the next TLP timeout (PTO) expires. The sender may send in theory a large amount of TLP until send queue is depleted. This only happens if the sender sees such irregular uncommon ACK pattern. But it is generally undesirable behavior during congestion especially. The original TLP design restrict only one TLP probe per inflight as published in "Reducing Web Latency: the Virtue of Gentle Aggression", SIGCOMM 2013. This patch changes TLP to send at most one probe per inflight. Note that if the sender is app-limited, TLP retransmits old data and did not have this issue. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++-- net/ipv4/tcp_input.c | 11 ++++++----- net/ipv4/tcp_output.c | 13 ++++++++----- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9aac824c523cf0..a1bbaa1c1a3a3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,7 +220,9 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; - u8 dup_ack_counter; + u8 dup_ack_counter:2, + tlp_retrans:1, /* TLP is a retransmission */ + unused:5; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ @@ -243,7 +245,7 @@ struct tcp_sock { save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ syn_smc:1; /* SYN includes SMC */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9615e72656d12e..518f04355fbf33 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3488,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } } -/* This routine deals with acks during a TLP episode. - * We mark the end of a TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. +/* This routine deals with acks during a TLP episode and ends an episode by + * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -3500,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (before(ack, tp->tlp_high_seq)) return; - if (flag & FLAG_DSACKING_ACK) { + if (!tp->tlp_retrans) { + /* TLP of new data has been acknowledged */ + tp->tlp_high_seq = 0; + } else if (flag & FLAG_DSACKING_ACK) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5f5b2f0b0e6065..0bc05d68cd7473 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2624,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk) int pcount; int mss = tcp_current_mss(sk); + /* At most one outstanding TLP */ + if (tp->tlp_high_seq) + goto rearm_timer; + + tp->tlp_retrans = 0; skb = tcp_send_head(sk); if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; @@ -2641,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk) return; } - /* At most one outstanding TLP retransmission. */ - if (tp->tlp_high_seq) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2666,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk) if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; + tp->tlp_retrans = 1; + +probe_sent: /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; -probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; From 0e6705182d4e1b77248a93470d6d7b3013d59b30 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Jul 2020 14:41:29 -0500 Subject: [PATCH 119/131] Revert "cifs: Fix the target file was deleted when rename failed." This reverts commit 9ffad9263b467efd8f8dc7ae1941a0a655a2bab2. Upon additional testing with older servers, it was found that the original commit introduced a regression when using the old SMB1 dialect and rsyncing over an existing file. The patch will need to be respun to address this, likely including a larger refactoring of the SMB1 and SMB3 rename code paths to make it less confusing and also to address some additional rename error cases that SMB3 may be able to workaround. Signed-off-by: Steve French Reported-by: Patrick Fernie CC: Stable Acked-by: Ronnie Sahlberg Acked-by: Pavel Shilovsky Acked-by: Zhang Xiaoxu --- fs/cifs/inode.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 49c3ea8aa84588..ce95801e9b6644 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2044,7 +2044,6 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; - bool new_target = d_really_is_negative(target_dentry); if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2121,13 +2120,8 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, */ unlink_target: - /* - * If the target dentry was created during the rename, try - * unlinking it if it's not negative - */ - if (new_target && - d_really_is_positive(target_dentry) && - (rc == -EACCES || rc == -EEXIST)) { + /* Try unlinking the target dentry if it's not negative */ + if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) { if (d_is_dir(target_dentry)) tmprc = cifs_rmdir(target_dir, target_dentry); else From c75d1d5248c0c97996051809ad0e9f154ba5d76e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Jul 2020 16:31:54 -0700 Subject: [PATCH 120/131] bonding: check return value of register_netdevice() in bond_newlink() Very similar to commit 544f287b8495 ("bonding: check error value of register_netdevice() immediately"), we should immediately check the return value of register_netdevice() before doing anything else. Fixes: 005db31d5f5f ("bonding: set carrier off for devices created through netlink") Reported-and-tested-by: syzbot+bbc3a11c4da63c1b74d6@syzkaller.appspotmail.com Cc: Beniamino Galvani Cc: Taehee Yoo Cc: Jay Vosburgh Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index b43b51646b11a6..f0f9138e967f3a 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -456,11 +456,10 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev, return err; err = register_netdevice(bond_dev); - - netif_carrier_off(bond_dev); if (!err) { struct bonding *bond = netdev_priv(bond_dev); + netif_carrier_off(bond_dev); bond_work_init_all(bond); } From 32818c075c54bb0cae44dd6f7ab00b01c52b8372 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Jul 2020 18:56:25 -0700 Subject: [PATCH 121/131] geneve: fix an uninitialized value in geneve_changelink() geneve_nl2info() sets 'df' conditionally, so we have to initialize it by copying the value from existing geneve device in geneve_changelink(). Fixes: 56c09de347e4 ("geneve: allow changing DF behavior after creation") Reported-by: syzbot+7ebc2e088af5e4c0c9fa@syzkaller.appspotmail.com Cc: Sabrina Dubroca Signed-off-by: Cong Wang Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 4661ef865807f0..dec52b763d5080 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1615,11 +1615,11 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct geneve_dev *geneve = netdev_priv(dev); + enum ifla_geneve_df df = geneve->df; struct geneve_sock *gs4, *gs6; struct ip_tunnel_info info; bool metadata; bool use_udp6_rx_checksums; - enum ifla_geneve_df df; bool ttl_inherit; int err; From 062d3f95b630113e1156a31f376ad36e25da29a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Jul 2020 21:10:42 +0100 Subject: [PATCH 122/131] sched: Warn if garbage is passed to default_wake_function() Since the default_wake_function() passes its flags onto try_to_wake_up(), warn if those flags collide with internal values. Given that the supplied flags are garbage, no repair can be done but at least alert the user to the damage they are causing. In the belief that these errors should be picked up during testing, the warning is only compiled in under CONFIG_SCHED_DEBUG. Signed-off-by: Chris Wilson Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200723201042.18861-1-chris@chris-wilson.co.uk --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5dece9b34e25ec..2142c67676826d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4485,6 +4485,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, void *key) { + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); return try_to_wake_up(curr->private, mode, wake_flags); } EXPORT_SYMBOL(default_wake_function); From fe5ed7ab99c656bd2f5b79b49df0e9ebf2cead8a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jul 2020 17:44:20 +0200 Subject: [PATCH 123/131] uprobes: Change handle_swbp() to send SIGTRAP with si_code=SI_KERNEL, to fix GDB regression If a tracee is uprobed and it hits int3 inserted by debugger, handle_swbp() does send_sig(SIGTRAP, current, 0) which means si_code == SI_USER. This used to work when this code was written, but then GDB started to validate si_code and now it simply can't use breakpoints if the tracee has an active uprobe: # cat test.c void unused_func(void) { } int main(void) { return 0; } # gcc -g test.c -o test # perf probe -x ./test -a unused_func # perf record -e probe_test:unused_func gdb ./test -ex run GNU gdb (GDB) 10.0.50.20200714-git ... Program received signal SIGTRAP, Trace/breakpoint trap. 0x00007ffff7ddf909 in dl_main () from /lib64/ld-linux-x86-64.so.2 (gdb) The tracee hits the internal breakpoint inserted by GDB to monitor shared library events but GDB misinterprets this SIGTRAP and reports a signal. Change handle_swbp() to use force_sig(SIGTRAP), this matches do_int3_user() and fixes the problem. This is the minimal fix for -stable, arch/x86/kernel/uprobes.c is equally wrong; it should use send_sigtrap(TRAP_TRACE) instead of send_sig(SIGTRAP), but this doesn't confuse GDB and needs another x86-specific patch. Reported-by: Aaron Merey Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Reviewed-by: Srikar Dronamraju Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200723154420.GA32043@redhat.com --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bb0862873dbaa1..5f8b0c52fd2ef6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2199,7 +2199,7 @@ static void handle_swbp(struct pt_regs *regs) if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ - send_sig(SIGTRAP, current, 0); + force_sig(SIGTRAP); } else { /* * Either we raced with uprobe_unregister() or we can't From c2b69f24ebd166a13cdc9909b50f33228895998b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 24 Jul 2020 10:50:22 +1000 Subject: [PATCH 124/131] flow_offload: Move rhashtable inclusion to the source file I noticed that touching linux/rhashtable.h causes lib/vsprintf.c to be rebuilt. This dependency came through a bogus inclusion in the file net/flow_offload.h. This patch moves it to the right place. This patch also removes a lingering rhashtable inclusion in cls_api created by the same commit. Fixes: 4e481908c51b ("flow_offload: move tc indirect block to...") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 - net/core/flow_offload.c | 1 + net/sched/cls_api.c | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 6315324b9dc2c4..3eaf25f68b7957 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -5,7 +5,6 @@ #include #include #include -#include struct flow_match { struct flow_dissector *dissector; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index b739cfab796e49..2076219b8ba582 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -4,6 +4,7 @@ #include #include #include +#include struct flow_rule *flow_rule_alloc(unsigned int num_actions) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e62beec0d84405..4619cb3cb0a8f5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include From c2c633106453611be07821f53dff9e93a9d1c3f0 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 24 Jul 2020 10:59:10 +0200 Subject: [PATCH 125/131] xen-netfront: fix potential deadlock in xennet_remove() There's a potential race in xennet_remove(); this is what the driver is doing upon unregistering a network device: 1. state = read bus state 2. if state is not "Closed": 3. request to set state to "Closing" 4. wait for state to be set to "Closing" 5. request to set state to "Closed" 6. wait for state to be set to "Closed" If the state changes to "Closed" immediately after step 1 we are stuck forever in step 4, because the state will never go back from "Closed" to "Closing". Make sure to check also for state == "Closed" in step 4 to prevent the deadlock. Also add a 5 sec timeout any time we wait for the bus state to change, to avoid getting stuck forever in wait_event(). Signed-off-by: Andrea Righi Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 64 +++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 482c6c8b0fb7e0..88280057e03217 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -63,6 +63,8 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +#define XENNET_TIMEOUT (5 * HZ) + static const struct ethtool_ops xennet_ethtool_ops; struct netfront_cb { @@ -1334,12 +1336,15 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); - xenbus_switch_state(dev, XenbusStateInitialising); - wait_event(module_wq, - xenbus_read_driver_state(dev->otherend) != - XenbusStateClosed && - xenbus_read_driver_state(dev->otherend) != - XenbusStateUnknown); + do { + xenbus_switch_state(dev, XenbusStateInitialising); + err = wait_event_timeout(module_wq, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown, XENNET_TIMEOUT); + } while (!err); + return netdev; exit: @@ -2139,28 +2144,43 @@ static const struct attribute_group xennet_dev_group = { }; #endif /* CONFIG_SYSFS */ -static int xennet_remove(struct xenbus_device *dev) +static void xennet_bus_close(struct xenbus_device *dev) { - struct netfront_info *info = dev_get_drvdata(&dev->dev); - - dev_dbg(&dev->dev, "%s\n", dev->nodename); + int ret; - if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + return; + do { xenbus_switch_state(dev, XenbusStateClosing); - wait_event(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown); + ret = wait_event_timeout(module_wq, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing || + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); + } while (!ret); + + if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + return; + do { xenbus_switch_state(dev, XenbusStateClosed); - wait_event(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosed || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown); - } + ret = wait_event_timeout(module_wq, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); + } while (!ret); +} + +static int xennet_remove(struct xenbus_device *dev) +{ + struct netfront_info *info = dev_get_drvdata(&dev->dev); + xennet_bus_close(dev); xennet_disconnect_backend(info); if (info->netdev->reg_state == NETREG_REGISTERED) From af9f691f0f5bdd1ade65a7b84927639882d7c3e5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 24 Jul 2020 09:45:51 -0700 Subject: [PATCH 126/131] qrtr: orphan socket in qrtr_release() We have to detach sock from socket in qrtr_release(), otherwise skb->sk may still reference to this socket when the skb is released in tun->queue, particularly sk->sk_wq still points to &sock->wq, which leads to a UAF. Reported-and-tested-by: syzbot+6720d64f31c081c2f708@syzkaller.appspotmail.com Fixes: 28fb4e59a47d ("net: qrtr: Expose tunneling endpoint to user space") Cc: Bjorn Andersson Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 24a8c3c6da0dca..300a104b9a0fb0 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1180,6 +1180,7 @@ static int qrtr_release(struct socket *sock) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); sock->sk = NULL; if (!sock_flag(sk, SOCK_ZAPPED)) From d0d8aae64566b753c4330fbd5944b88af035f299 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 15 Jul 2020 16:30:07 -0700 Subject: [PATCH 127/131] RISC-V: Set maximum number of mapped pages correctly Currently, maximum number of mapper pages are set to the pfn calculated from the memblock size of the memblock containing kernel. This will work until that memblock spans the entire memory. However, it will be set to a wrong value if there are multiple memblocks defined in kernel (e.g. with efi runtime services). Set the the maximum value to the pfn calculated from dram size. Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f4adb3684f3db8..8d22973bde4065 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -150,9 +150,9 @@ void __init setup_bootmem(void) /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - set_max_mapnr(PFN_DOWN(mem_size)); max_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = max_pfn; + set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); From 7df5cb75cfb8acf96c7f2342530eb41e0c11f4c3 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 23 Jul 2020 11:31:48 -0600 Subject: [PATCH 128/131] dev: Defer free of skbs in flush_backlog IRQs are disabled when freeing skbs in input queue. Use the IRQ safe variant to free skbs here. Fixes: 145dd5f9c88f ("net: flush the softnet backlog in process context") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 90b59fc50dc9c9..7a774ebf64e26d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5601,7 +5601,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + dev_kfree_skb_irq(skb); input_queue_head_incr(sd); } } From 8754e1379e7089516a449821f88e1fe1ebbae5e1 Mon Sep 17 00:00:00 2001 From: Xie He Date: Fri, 24 Jul 2020 09:33:47 -0700 Subject: [PATCH 129/131] drivers/net/wan: lapb: Corrected the usage of skb_cow This patch fixed 2 issues with the usage of skb_cow in LAPB drivers "lapbether" and "hdlc_x25": 1) After skb_cow fails, kfree_skb should be called to drop a reference to the skb. But in both drivers, kfree_skb is not called. 2) skb_cow should be called before skb_push so that is can ensure the safety of skb_push. But in "lapbether", it is incorrectly called after skb_push. More details about these 2 issues: 1) The behavior of calling kfree_skb on failure is also the behavior of netif_rx, which is called by this function with "return netif_rx(skb);". So this function should follow this behavior, too. 2) In "lapbether", skb_cow is called after skb_push. This results in 2 logical issues: a) skb_push is not protected by skb_cow; b) An extra headroom of 1 byte is ensured after skb_push. This extra headroom has no use in this function. It also has no use in the upper-layer function that this function passes the skb to (x25_lapb_receive_frame in net/x25/x25_dev.c). So logically skb_cow should instead be called before skb_push. Cc: Eric Dumazet Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_x25.c | 4 +++- drivers/net/wan/lapbether.c | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index c84536b03aa84e..f70336bb6f5244 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -71,8 +71,10 @@ static int x25_data_indication(struct net_device *dev, struct sk_buff *skb) { unsigned char *ptr; - if (skb_cow(skb, 1)) + if (skb_cow(skb, 1)) { + kfree_skb(skb); return NET_RX_DROP; + } skb_push(skb, 1); skb_reset_network_header(skb); diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 284832314f3109..b2868433718f61 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -128,10 +128,12 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb) { unsigned char *ptr; - skb_push(skb, 1); - - if (skb_cow(skb, 1)) + if (skb_cow(skb, 1)) { + kfree_skb(skb); return NET_RX_DROP; + } + + skb_push(skb, 1); ptr = skb->data; *ptr = X25_IFACE_DATA; From 4400231c8acc7e513204c8470c6d796ba47dc169 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 15 Jul 2020 16:30:08 -0700 Subject: [PATCH 130/131] RISC-V: Do not rely on initrd_start/end computed during early dt parsing Currently, initrd_start/end are computed during early_init_dt_scan but used during arch_setup. We will get the following panic if initrd is used and CONFIG_DEBUG_VIRTUAL is turned on. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] kernel BUG at arch/riscv/mm/physaddr.c:33! [ 0.000000] Kernel BUG [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-rc4-00015-ged0b226fed02 #886 [ 0.000000] epc: ffffffe0002058d2 ra : ffffffe0000053f0 sp : ffffffe001001f40 [ 0.000000] gp : ffffffe00106e250 tp : ffffffe001009d40 t0 : ffffffe00107ee28 [ 0.000000] t1 : 0000000000000000 t2 : ffffffe000a2e880 s0 : ffffffe001001f50 [ 0.000000] s1 : ffffffe0001383e8 a0 : ffffffe00c087e00 a1 : 0000000080200000 [ 0.000000] a2 : 00000000010bf000 a3 : ffffffe00106f3c8 a4 : ffffffe0010bf000 [ 0.000000] a5 : ffffffe000000000 a6 : 0000000000000006 a7 : 0000000000000001 [ 0.000000] s2 : ffffffe00106f068 s3 : ffffffe00106f070 s4 : 0000000080200000 [ 0.000000] s5 : 0000000082200000 s6 : 0000000000000000 s7 : 0000000000000000 [ 0.000000] s8 : 0000000080011010 s9 : 0000000080012700 s10: 0000000000000000 [ 0.000000] s11: 0000000000000000 t3 : 000000000001fe30 t4 : 000000000001fe30 [ 0.000000] t5 : 0000000000000000 t6 : ffffffe00107c471 [ 0.000000] status: 0000000000000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.000000] random: get_random_bytes called from print_oops_end_marker+0x22/0x46 with crng_init=0 To avoid the error, initrd_start/end can be computed from phys_initrd_start/size in setup itself. It also improves the initrd placement by aligning the start and size with the page size. Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8d22973bde4065..f818a47a72d14b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -95,19 +95,40 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD static void __init setup_initrd(void) { + phys_addr_t start; unsigned long size; - if (initrd_start >= initrd_end) { - pr_info("initrd not found or empty"); + /* Ignore the virtul address computed during device tree parsing */ + initrd_start = initrd_end = 0; + + if (!phys_initrd_size) + return; + /* + * Round the memory region to page boundaries as per free_initrd_mem() + * This allows us to detect whether the pages overlapping the initrd + * are in use, but more importantly, reserves the entire set of pages + * as we don't want these pages allocated for other purposes. + */ + start = round_down(phys_initrd_start, PAGE_SIZE); + size = phys_initrd_size + (phys_initrd_start - start); + size = round_up(size, PAGE_SIZE); + + if (!memblock_is_region_memory(start, size)) { + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region", + (u64)start, size); goto disable; } - if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) { - pr_err("initrd extends beyond end of memory"); + + if (memblock_is_region_reserved(start, size)) { + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n", + (u64)start, size); goto disable; } - size = initrd_end - initrd_start; - memblock_reserve(__pa_symbol(initrd_start), size); + memblock_reserve(start, size); + /* Now convert initrd to virtual addresses */ + initrd_start = (unsigned long)__va(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; initrd_below_start_ok = 1; pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", From fa5a198359053c8e21dcc2b39c0e13871059bc9f Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 15 Jul 2020 16:30:09 -0700 Subject: [PATCH 131/131] riscv: Parse all memory blocks to remove unusable memory Currently, maximum physical memory allowed is equal to -PAGE_OFFSET. That's why we remove any memory blocks spanning beyond that size. However, it is done only for memblock containing linux kernel which will not work if there are multiple memblocks. Process all memory blocks to figure out how much memory needs to be removed and remove at the end instead of updating the memblock list in place. Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f818a47a72d14b..79e9d55bdf1ac4 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -147,26 +147,29 @@ void __init setup_bootmem(void) { struct memblock_region *reg; phys_addr_t mem_size = 0; + phys_addr_t total_mem = 0; + phys_addr_t mem_start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); /* Find the memory region containing the kernel */ for_each_memblock(memory, reg) { - phys_addr_t end = reg->base + reg->size; - - if (reg->base <= vmlinux_start && vmlinux_end <= end) { - mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); - - /* - * Remove memblock from the end of usable area to the - * end of region - */ - if (reg->base + mem_size < end) - memblock_remove(reg->base + mem_size, - end - reg->base - mem_size); - } + end = reg->base + reg->size; + if (!total_mem) + mem_start = reg->base; + if (reg->base <= vmlinux_start && vmlinux_end <= end) + BUG_ON(reg->size == 0); + total_mem = total_mem + reg->size; } - BUG_ON(mem_size == 0); + + /* + * Remove memblock from the end of usable area to the + * end of region + */ + mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET); + if (mem_start + mem_size < end) + memblock_remove(mem_start + mem_size, + end - mem_start - mem_size); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);