From fc0f7e3317c5f406e6d5520209b4689a4ffecfdf Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Mon, 6 Jun 2016 10:47:47 +0200 Subject: [PATCH 01/22] net: phy: smsc: reintroduced unconditional soft reset We detected some problems using the smsc lan8720a in combination with i.MX28 and tracked this down to commit 21009686662f ("net: phy: smsc: move smsc_phy_config_init reset part in a soft_reset function") With 2100968666 the generic soft reset is replaced by a specific function which handles power down state correctly. But additionally the soft reset itself got conditional and is therefore also only performed if the phy is in power down state. This patch keeps the conditional wake up from power down, but re-introduces the unconditional soft reset using the generic soft reset function. It was tested on linux-4.1.25 and linux-4.7.0-rc2. Signed-off-by: Manfred Schlaegl Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 2e21e9366f76a1..b62c4aaee40bd8 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev) * in all capable mode before using it. */ if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) { - int timeout = 50000; - - /* set "all capable" mode and reset the phy */ + /* set "all capable" mode */ rc |= MII_LAN83C185_MODE_ALL; phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc); - phy_write(phydev, MII_BMCR, BMCR_RESET); - - /* wait end of reset (max 500 ms) */ - do { - udelay(10); - if (timeout-- == 0) - return -1; - rc = phy_read(phydev, MII_BMCR); - } while (rc & BMCR_RESET); } - return 0; + + /* reset the phy */ + return genphy_soft_reset(phydev); } static int lan911x_config_init(struct phy_device *phydev) From 56fae404fb2c306db0a35dad0d16fa24c65678f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 Jun 2016 12:06:58 +0300 Subject: [PATCH 02/22] bridge: Fix incorrect re-injection of STP packets Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") fixed incorrect usage of NF_HOOK's return value by consuming packets in okfn via br_pass_frame_up(). However, this function re-injects packets to the Rx path with skb->dev set to the bridge device, which breaks kernel's STP, as all STP packets appear to originate from the bridge device itself. Instead, if STP is enabled and bridge isn't a 802.1ad bridge, then learn packet's SMAC and inject it back to the Rx path for further processing by the packet handlers. The patch also makes netfilter's behavior consistent with regards to packets destined to the Bridge Group Address, as no hook registered at LOCAL_IN will ever be called, regardless if STP is enabled or not. Cc: Florian Westphal Cc: Shmulik Ladkani Cc: Toshiaki Makita Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/bridge/br_input.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 16079772222860..43d2cd862bc260 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -213,8 +213,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } EXPORT_SYMBOL_GPL(br_handle_frame_finish); -/* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void __br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + + __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); @@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5])) goto forward; - break; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ goto drop; From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 18:05:09 +0100 Subject: [PATCH 03/22] net: diag: add missing declarations The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill seem to have been missed from the include/linux/inet_diag.h header file. Add them to fix the following warnings: net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static? net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 6 ++++++ net/sctp/sctp_diag.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7c27fa1030e873..feb04ea20f11cb 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net, int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); + +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* _INET_DIAG_H_ */ diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b87618a0..f69edcf219e514 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include #include -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Jun 2016 12:11:06 +0100 Subject: [PATCH 04/22] net: ipconfig: avoid warning by making ic_addrservaddr static The symbol ic_addrservaddr is not static, but has no declaration to match so make it static to fix the following warning: net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2b5f2f5a..eccf9fd190c0be 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ __be32 ic_servaddr = NONE; /* Boot server IP address */ From 562c5a70400c75f50d99de631666ac7cc2f03958 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:58 +0200 Subject: [PATCH 05/22] net: mediatek: add missing return code check The code fails to check if the scratch memory was properly allocated. Add this check and return with an error if the allocation failed. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4763252bbf8557..11cd7304e49771 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -495,6 +495,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL); + if (unlikely(!eth->scratch_head)) + return -ENOMEM; + dma_addr = dma_map_single(eth->dev, eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE, DMA_FROM_DEVICE); From 605e4fe476956c67ced8518247e6c9601a31b868 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:59 +0200 Subject: [PATCH 06/22] net: mediatek: fix missing free of scratch memory Scratch memory gets allocated in mtk_init_fq_dma() but the corresponding code to free it is missing inside mtk_dma_free() causing a memory leak. With this patch applied, we can run ifconfig up/down several thousand times without any problems. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 18 +++++++++++++----- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 11cd7304e49771..fb8b17db7fa2b1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -481,14 +481,14 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { - dma_addr_t phy_ring_head, phy_ring_tail; + dma_addr_t phy_ring_tail; int cnt = MTK_DMA_SIZE; dma_addr_t dma_addr; int i; eth->scratch_ring = dma_alloc_coherent(eth->dev, cnt * sizeof(struct mtk_tx_dma), - &phy_ring_head, + ð->phy_scratch_ring, GFP_ATOMIC | __GFP_ZERO); if (unlikely(!eth->scratch_ring)) return -ENOMEM; @@ -505,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) return -ENOMEM; memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt); - phy_ring_tail = phy_ring_head + + phy_ring_tail = eth->phy_scratch_ring + (sizeof(struct mtk_tx_dma) * (cnt - 1)); for (i = 0; i < cnt; i++) { eth->scratch_ring[i].txd1 = (dma_addr + (i * MTK_QDMA_PAGE_SIZE)); if (i < cnt - 1) - eth->scratch_ring[i].txd2 = (phy_ring_head + + eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring + ((i + 1) * sizeof(struct mtk_tx_dma))); eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE); } - mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD); + mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD); mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL); mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT); mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN); @@ -1210,6 +1210,14 @@ static void mtk_dma_free(struct mtk_eth *eth) for (i = 0; i < MTK_MAC_COUNT; i++) if (eth->netdev[i]) netdev_reset_queue(eth->netdev[i]); + if (eth->scratch_ring) { + dma_free_coherent(eth->dev, + MTK_DMA_SIZE * sizeof(struct mtk_tx_dma), + eth->scratch_ring, + eth->phy_scratch_ring); + eth->scratch_ring = NULL; + eth->phy_scratch_ring = 0; + } mtk_tx_clean(eth); mtk_rx_clean(eth); kfree(eth->scratch_head); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index eed626d56ea4f1..57f7e8a3dbe29a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -357,6 +357,7 @@ struct mtk_rx_ring { * @rx_ring: Pointer to the memore holding info about the RX ring * @rx_napi: The NAPI struct * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring + * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. * @clk_ethif: The ethif clock * @clk_esw: The switch clock @@ -384,6 +385,7 @@ struct mtk_eth { struct mtk_rx_ring rx_ring; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; + dma_addr_t phy_scratch_ring; void *scratch_head; struct clk *clk_ethif; struct clk *clk_esw; From 2fae723cefb8bfe712371978288aa0a70b54802e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:00 +0200 Subject: [PATCH 07/22] net: mediatek: invalid buffer lookup in mtk_tx_map() The lookup of the tx_buffer in the error path inside mtk_tx_map() uses the wrong descriptor pointer. This looks like a copy & paste error. Change the code to use the correct pointer. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index fb8b17db7fa2b1..8c3e5438794819 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -674,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, err_dma: do { - tx_buf = mtk_desc_to_tx_buf(ring, txd); + tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ mtk_tx_unmap(&dev->dev, tx_buf); From 94321a9fc9f5b6c6e949cc7c69741538f556ab74 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:01 +0200 Subject: [PATCH 08/22] net: mediatek: dropped rx packets are not being counted properly There are two places inside mtk_poll_rx where rx_dropped is not being incremented properly. Fix this by adding the missing code to increment the counter. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8c3e5438794819..b6d3c217722d22 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -829,6 +829,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { skb_free_frag(new_data); + netdev->stats.rx_dropped++; goto release_desc; } @@ -836,6 +837,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb = build_skb(data, ring->frag_size); if (unlikely(!skb)) { put_page(virt_to_head_page(new_data)); + netdev->stats.rx_dropped++; goto release_desc; } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); From 6675086d04e7c0748cd5884f7c8611b5f0836250 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:02 +0200 Subject: [PATCH 09/22] net: mediatek: add next data pointer coherency protection The QDMA engine can fail to update the register pointing to the next TX descriptor if this bit does not get set in the QDMA configuration register. Not setting this bit can result in invalid values inside the TX rings registers which will causes TX stalls. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b6d3c217722d22..f30c2e474b8788 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1282,7 +1282,7 @@ static int mtk_start_dma(struct mtk_eth *eth) mtk_w32(eth, MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN | MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS | - MTK_RX_BT_32DWORDS, + MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO, MTK_QDMA_GLO_CFG); return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 57f7e8a3dbe29a..a5eb7c62306b66 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -91,6 +91,7 @@ #define MTK_QDMA_GLO_CFG 0x1A04 #define MTK_RX_2B_OFFSET BIT(31) #define MTK_RX_BT_32DWORDS (3 << 11) +#define MTK_NDP_CO_PRO BIT(10) #define MTK_TX_WB_DDONE BIT(6) #define MTK_DMA_SIZE_16DWORDS (2 << 4) #define MTK_RX_DMA_BUSY BIT(3) From 2ff0bb61646f286fa97db2904491974302a14f1f Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:03 +0200 Subject: [PATCH 10/22] net: mediatek: disable all interrupts during probe The current code only disables those IRQs that we will later use. To ensure that we have a predefined state, we really want to disable all IRQs. Change the code to disable all IRQs to achieve this. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f30c2e474b8788..e3dadae9f6dd7b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1396,7 +1396,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_disable(eth, ~0); mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); mtk_w32(eth, 0, MTK_RST_GL); From 04698cccb1de54d5d97fda2e4a1c6ca365da0f70 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:04 +0200 Subject: [PATCH 11/22] net: mediatek: fix threshold value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to calculate the threshold value for stopping the TX queue is bad. Currently it will always use 1/2 of the rings size, which is way too much. Set the threshold to MAX_SKB_FRAGS. This makes sure that the queue is stopped when there is not enough room to accept an additional segment.  Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e3dadae9f6dd7b..032939d211a7c6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1033,8 +1033,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; - ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2, - MAX_SKB_FRAGS); + ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we * continue From eaadf9fd3f6390f6ecbd0dfbd5997a0486fc9d5e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:05 +0200 Subject: [PATCH 12/22] net: mediatek: increase watchdog_timeo During stress testing, after reducing the threshold value, we have seen TX timeouts that were caused by the watchdog_timeo value being too low. Increase the value to 5 * HZ which is a value commonly used by many other drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 032939d211a7c6..3b3ba2e55bdce0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1709,7 +1709,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); - eth->netdev[id]->watchdog_timeo = HZ; + eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; eth->netdev[id]->vlan_features = MTK_HW_FEATURES & From 12c97c13ea7174db5b5dc4a1ef91d4e9245bb569 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:06 +0200 Subject: [PATCH 13/22] net: mediatek: fix off by one in the TX ring allocation The TX ring setup has an off by one error causing it to not utilise all descriptors. This has the side effect that we need to reset the next pointer at runtime to make it work. Fix the off by one and remove the code fixing the ring at runtime. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3b3ba2e55bdce0..c097e9e3926c63 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -926,7 +926,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) } mtk_tx_unmap(eth->dev, tx_buf); - ring->last_free->txd2 = next_cpu; ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1032,7 +1031,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; - ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; + ring->last_free = &ring->dma[MTK_DMA_SIZE - 1]; ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we From ad3cba989e8b1bbefe078eece29f0e8d8aaea1d6 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:07 +0200 Subject: [PATCH 14/22] net: mediatek: only wake the queue if it is stopped The current code unconditionally wakes up the queue at the end of each tx_poll action. Change the code to only wake up the queues if any of them have actually been stopped before. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c097e9e3926c63..40e37b158a690f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -704,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb) return nfrags; } +static int mtk_queue_stopped(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + if (netif_queue_stopped(eth->netdev[i])) + return 1; + } + + return 0; +} + static void mtk_wake_queue(struct mtk_eth *eth) { int i; @@ -950,7 +964,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) if (!total) return 0; - if (atomic_read(&ring->free_count) > ring->thresh) + if (mtk_queue_stopped(eth) && + (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); return total; From 82c6544dddc6c4bd940917af2f987dee6be0fc17 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:08 +0200 Subject: [PATCH 15/22] net: mediatek: remove superfluous queue wake up call The code checks if the queue should be stopped because we are below the threshold of free descriptors only to check if it should be started again. If we do end up in a state where we are at the threshold limit, it makes more sense to just stop the queue and wait for the next IRQ to trigger the TX housekeeping again. There is no rush in enqueuing the next packet, it needs to wait for all the others in the queue to be dispatched first anyway. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 40e37b158a690f..d1cdc2d7615158 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -783,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0) goto drop; - if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) { + if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) mtk_stop_queue(eth); - if (unlikely(atomic_read(&ring->free_count) > - ring->thresh)) - mtk_wake_queue(eth); - } + spin_unlock_irqrestore(ð->page_lock, flags); return NETDEV_TX_OK; From 00a8231388d573195a77f6de63b05c4920589a8e Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Fri, 10 Jun 2016 16:53:05 +0200 Subject: [PATCH 16/22] net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db42..d8c77e8e25edb1 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,13 +508,12 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); - if (!aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); + if (phydev && !aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ From 3ab51f9f08106467e705fbbd7d39447e14a40e98 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 10 Jun 2016 23:34:24 -0700 Subject: [PATCH 17/22] Revert "net: au1000_eth: fix PHY detection" This reverts commit a2f27217e4e60e663b5b971b0ccb287a9548b04e. I applied the wrong version of this. Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index d8c77e8e25edb1..e0fb0f1122db42 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,12 +508,13 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { - phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); - if (phydev && !aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) + if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (!aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ From 5b22d3747756a3e9bceac1acb76304faa0b38680 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Sat, 11 Jun 2016 00:13:04 +0200 Subject: [PATCH 18/22] net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db42..20760e10211a48 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev) * on the current MAC's MII bus */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (mdiobus_get_phy(aup->mii_bus, phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); if (!aup->phy_search_highest_addr) /* break out with first one found */ break; From 600a28b9e5c0bd694212248aabb8bdc655837a63 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Jun 2016 20:40:24 -0700 Subject: [PATCH 19/22] ipconfig: Protect ic_addrservaddr with IPCONFIG_DYNAMIC. >> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable] static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index eccf9fd190c0be..1d71c40eaaf35a 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +#ifdef IPCONFIG_DYNAMIC static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ From d8fafc04da8f46e0299a4ae4aa40cc17b87b5f9f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Jun 2016 16:21:47 -0700 Subject: [PATCH 20/22] net_sched: prio: properly report out of memory errors At Qdisc creation or change time, prio_tune() creates missing pfifo qdiscs but does not return an error code if one qdisc could not be allocated. Leaving a qdisc in non operational state without telling user anything about this problem is not good. Also, testing if we replace something different than noop_qdisc a second time makes no sense so I removed useless code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4b0a82191bc401..071718bccdab65 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -202,26 +202,18 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; - - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; - - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + struct Qdisc *child; + + if (q->queues[i] != &noop_qdisc) + continue; + + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!child) + return -ENOMEM; + sch_tree_lock(sch); + q->queues[i] = child; + sch_tree_unlock(sch); } return 0; } From f4bbb2c4e3188a44bf4a45c7ca621586f9223b30 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Sat, 11 Jun 2016 01:11:54 +0300 Subject: [PATCH 21/22] net: ethernet: ti: cpsw: use destroy ctlr to destroy channels There is no reason to destroy channels that are destroyed while cpdma_ctlr destroy. In this case no need to remember how much channels where created and destroy them by one, as cpdma_ctlr destroys all of them. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e6bb0ecb12c74c..53190894f17a04 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev) clean_ale_ret: cpsw_ale_destroy(priv->ale); clean_dma_ret: - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); @@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); pm_runtime_disable(&pdev->dev); device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device); From bbbbac7ece6614c593686a35c9b22a129153feb1 Mon Sep 17 00:00:00 2001 From: Daniel Metz Date: Fri, 10 Jun 2016 10:34:52 +0200 Subject: [PATCH 22/22] tcp: use RFC6298 compliant TCP RTO calculation This patch adjusts Linux RTO calculation to be RFC6298 Standard compliant. MinRTO is no longer added to the computed RTO, RTO damping and overestimation are decreased. In RFC 6298 Standard TCP Retransmission Timeout (RTO) calculation the calculated RTO is rounded up to the Minimum RTO (MinRTO), if it is less. The Linux implementation as a discrepancy to the Standard basically adds the defined MinRTO to the calculated RTO. When comparing both approaches, the Linux calculation seems to perform worse for sender limited TCP flows like Telnet, SSH or constant bit rate encoded transmissions, especially for Round Trip Times (RTT) of 50ms to 800ms. Compared to the Linux implementation the RFC 6298 proposed RTO calculation performs better and more precise in adapting to current network characteristics. Extensive measurements for bulk data did not show a negative impact of the adjusted calculation. Using the RFC 6298 compliant implementation, the tcp_sock struct variable u32 mdev_max_us becomes obsolete and consequently is being removed. Exemplary performance comparison for sender-limited-flows: - Rate: 10Mbit/s - Delay: 200ms, Delay Variation: 10ms - Time between each scheduled segment: 1s - Amount Data Segments: 300 - Mean of 11 runs Mean Response Waiting Time [milliseconds] PER [%] | 0.5 1 1.5 2 3 5 7 10 --------+------------------------------------------------------- old | 206.4 208.6 218.0 218.6 227.2 249.3 274.7 308.2 new | 203.9 206.0 207.0 209.9 217.3 225.6 238.7 259.1 Detailed analysis: https://docs.google.com/document/d/1pKmPfnQb6fDK4qpiNVkN8cQyGE4wYDZukcuZfR- BnnM/ Reasoning for historic design: Sarolahti, P.; Kuznetsov, A. (2002). Congestion Control in Linux TCP. Conference Paper. Proceedings of the FREENIX Track. 2002 USENIX Annual https://www.cs.helsinki.fi/research/iwtcp/papers/linuxtcp.pdf Signed-off-by: Hagen Paul Pfeifer Signed-off-by: Daniel Metz Cc: Eric Dumazet Cc: Yuchung Cheng bla --- include/linux/tcp.h | 1 - net/ipv4/tcp_input.c | 74 ++++++++++-------------------------------- net/ipv4/tcp_metrics.c | 2 +- 3 files changed, 18 insertions(+), 59 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b124235486..d1790c565d0ad1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -231,7 +231,6 @@ struct tcp_sock { /* RTT measurement */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ - u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ struct rtt_meas { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6c8f4cd080001..252a1843cdfeb5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -680,8 +680,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge - * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 - * piece by Van Jacobson. + * Proceedings SIGCOMM 87]. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics @@ -692,59 +691,21 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) long m = mrtt_us; /* RTT */ u32 srtt = tp->srtt_us; - /* The following amusing code comes from Jacobson's - * article in SIGCOMM '88. Note that rtt and mdev - * are scaled versions of rtt and mean deviation. - * This is designed to be as fast as possible - * m stands for "measurement". - * - * On a 1990 paper the rto value is changed to: - * RTO = rtt + 4 * mdev - * - * Funny. This algorithm seems to be very broken. - * These formulae increase RTO, when it should be decreased, increase - * too slowly, when it should be increased quickly, decrease too quickly - * etc. I guess in BSD RTO takes ONE value, so that it is absolutely - * does not matter how to _calculate_ it. Seems, it was trap - * that VJ failed to avoid. 8) - */ if (srtt != 0) { - m -= (srtt >> 3); /* m is now error in rtt est */ - srtt += m; /* rtt = 7/8 rtt + 1/8 new */ - if (m < 0) { - m = -m; /* m is now abs(error) */ - m -= (tp->mdev_us >> 2); /* similar update on mdev */ - /* This is similar to one of Eifel findings. - * Eifel blocks mdev updates when rtt decreases. - * This solution is a bit different: we use finer gain - * for mdev in this case (alpha*beta). - * Like Eifel it also prevents growth of rto, - * but also it limits too fast rto decreases, - * happening in pure Eifel. - */ - if (m > 0) - m >>= 3; - } else { - m -= (tp->mdev_us >> 2); /* similar update on mdev */ - } - tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev_us > tp->mdev_max_us) { - tp->mdev_max_us = tp->mdev_us; - if (tp->mdev_max_us > tp->rttvar_us) - tp->rttvar_us = tp->mdev_max_us; - } - if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max_us < tp->rttvar_us) - tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; + m -= (srtt >> 3); /* m' = m - srtt / 8 = (R' - SRTT) */ + srtt += m; /* srtt = srtt + m’ = srtt + m - srtt / 8 */ + if (m < 0) + m = -m; + m -= (tp->mdev_us >> 2); /* m'' = |m'| - mdev / 4 */ + tp->mdev_us += m; + tp->rttvar_us = tp->mdev_us; + if (after(tp->snd_una, tp->rtt_seq)) tp->rtt_seq = tp->snd_nxt; - tp->mdev_max_us = tcp_rto_min_us(sk); - } } else { /* no previous measure. */ - srtt = m << 3; /* take the measured time to be rtt */ - tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ - tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); - tp->mdev_max_us = tp->rttvar_us; + srtt = m << 3; + tp->mdev_us = m << 1; + tp->rttvar_us = tp->mdev_us; tp->rtt_seq = tp->snd_nxt; } tp->srtt_us = max(1U, srtt); @@ -798,6 +759,7 @@ static void tcp_update_pacing_rate(struct sock *sk) */ static void tcp_set_rto(struct sock *sk) { + const u32 min_rto = tcp_rto_min_us(sk); const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * @@ -809,17 +771,15 @@ static void tcp_set_rto(struct sock *sk) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); - + if (((tp->srtt_us >> 3) + tp->rttvar_us) < min_rto) + inet_csk(sk)->icsk_rto = usecs_to_jiffies(min_rto); + else + inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ - - /* NOTE: clamping at TCP_RTO_MIN is not required, current algo - * guarantees that rto is higher. - */ tcp_bound_rto(sk); } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b617826e24770a..7f59f5b979c704 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -561,7 +561,7 @@ void tcp_init_metrics(struct sock *sk) * retransmission. */ tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); - tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; + tp->mdev_us = tp->rttvar_us; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; }