From fc0f7e3317c5f406e6d5520209b4689a4ffecfdf Mon Sep 17 00:00:00 2001
From: Manfred Schlaegl <manfred.schlaegl@ginzinger.com>
Date: Mon, 6 Jun 2016 10:47:47 +0200
Subject: [PATCH 01/22] net: phy: smsc: reintroduced unconditional soft reset

We detected some problems using the smsc lan8720a in combination with
i.MX28 and tracked this down to commit 21009686662f ("net: phy: smsc: move
smsc_phy_config_init reset part in a soft_reset function")
With 2100968666 the generic soft reset is replaced by a specific function
which handles power down state correctly. But additionally the soft reset
itself got conditional and is therefore also only performed if the phy is
in power down state.

This patch keeps the conditional wake up from power down, but
re-introduces the unconditional soft reset using the generic soft reset
function.
It was tested on linux-4.1.25 and linux-4.7.0-rc2.

Signed-off-by: Manfred Schlaegl <manfred.schlaegl@ginzinger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/smsc.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2e21e9366f76a1..b62c4aaee40bd8 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev)
 	 * in all capable mode before using it.
 	 */
 	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
-		int timeout = 50000;
-
-		/* set "all capable" mode and reset the phy */
+		/* set "all capable" mode */
 		rc |= MII_LAN83C185_MODE_ALL;
 		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
-		phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-		/* wait end of reset (max 500 ms) */
-		do {
-			udelay(10);
-			if (timeout-- == 0)
-				return -1;
-			rc = phy_read(phydev, MII_BMCR);
-		} while (rc & BMCR_RESET);
 	}
-	return 0;
+
+	/* reset the phy */
+	return genphy_soft_reset(phydev);
 }
 
 static int lan911x_config_init(struct phy_device *phydev)

From 56fae404fb2c306db0a35dad0d16fa24c65678f3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 7 Jun 2016 12:06:58 +0300
Subject: [PATCH 02/22] bridge: Fix incorrect re-injection of STP packets

Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook
returns QUEUE or STOLEN verdict") fixed incorrect usage of NF_HOOK's
return value by consuming packets in okfn via br_pass_frame_up().

However, this function re-injects packets to the Rx path with skb->dev
set to the bridge device, which breaks kernel's STP, as all STP packets
appear to originate from the bridge device itself.

Instead, if STP is enabled and bridge isn't a 802.1ad bridge, then learn
packet's SMAC and inject it back to the Rx path for further processing
by the packet handlers.

The patch also makes netfilter's behavior consistent with regards to
packets destined to the Bridge Group Address, as no hook registered at
LOCAL_IN will ever be called, regardless if STP is enabled or not.

Cc: Florian Westphal <fw@strlen.de>
Cc: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 16079772222860..43d2cd862bc260 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -213,8 +213,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 }
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
-/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void __br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
@@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
 	/* check if vlan is allowed, to avoid spoofing */
 	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
 		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+}
+
+/* note: already called with rcu_read_lock */
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+
+	__br_handle_local_finish(skb);
 
 	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
 	br_pass_frame_up(skb);
@@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 			if (p->br->stp_enabled == BR_NO_STP ||
 			    fwd_mask & (1u << dest[5]))
 				goto forward;
-			break;
+			*pskb = skb;
+			__br_handle_local_finish(skb);
+			return RX_HANDLER_PASS;
 
 		case 0x01:	/* IEEE MAC (Pause) */
 			goto drop;

From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Thu, 9 Jun 2016 18:05:09 +0100
Subject: [PATCH 03/22] net: diag: add missing declarations

The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill
seem to have been missed from the include/linux/inet_diag.h header
file. Add them to fix the following warnings:

net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static?
net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 6 ++++++
 net/sctp/sctp_diag.c      | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 7c27fa1030e873..feb04ea20f11cb 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
+void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+
+int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+			     struct inet_diag_msg *r, int ext,
+			     struct user_namespace *user_ns);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* _INET_DIAG_H_ */
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 1ce724b87618a0..f69edcf219e514 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -3,12 +3,6 @@
 #include <linux/sock_diag.h>
 #include <net/sctp/sctp.h>
 
-extern void inet_diag_msg_common_fill(struct inet_diag_msg *r,
-				      struct sock *sk);
-extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
-				    struct inet_diag_msg *r, int ext,
-				    struct user_namespace *user_ns);
-
 static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			       void *info);
 

From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Fri, 10 Jun 2016 12:11:06 +0100
Subject: [PATCH 04/22] net: ipconfig: avoid warning by making ic_addrservaddr
 static

The symbol ic_addrservaddr is not static, but has no declaration
to match so make it static to fix the following warning:

net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2ed9dd2b5f2f5a..eccf9fd190c0be 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
-__be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 

From 562c5a70400c75f50d99de631666ac7cc2f03958 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:27:58 +0200
Subject: [PATCH 05/22] net: mediatek: add missing return code check

The code fails to check if the scratch memory was properly allocated. Add
this check and return with an error if the allocation failed.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4763252bbf8557..11cd7304e49771 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -495,6 +495,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 
 	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
 				    GFP_KERNEL);
+	if (unlikely(!eth->scratch_head))
+		return -ENOMEM;
+
 	dma_addr = dma_map_single(eth->dev,
 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
 				  DMA_FROM_DEVICE);

From 605e4fe476956c67ced8518247e6c9601a31b868 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:27:59 +0200
Subject: [PATCH 06/22] net: mediatek: fix missing free of scratch memory

Scratch memory gets allocated in mtk_init_fq_dma() but the corresponding
code to free it is missing inside mtk_dma_free() causing a memory leak.
With this patch applied, we can run ifconfig up/down several thousand
times without any problems.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 18 +++++++++++++-----
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |  2 ++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 11cd7304e49771..fb8b17db7fa2b1 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -481,14 +481,14 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 /* the qdma core needs scratch memory to be setup */
 static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
-	dma_addr_t phy_ring_head, phy_ring_tail;
+	dma_addr_t phy_ring_tail;
 	int cnt = MTK_DMA_SIZE;
 	dma_addr_t dma_addr;
 	int i;
 
 	eth->scratch_ring = dma_alloc_coherent(eth->dev,
 					       cnt * sizeof(struct mtk_tx_dma),
-					       &phy_ring_head,
+					       &eth->phy_scratch_ring,
 					       GFP_ATOMIC | __GFP_ZERO);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
@@ -505,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 		return -ENOMEM;
 
 	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
-	phy_ring_tail = phy_ring_head +
+	phy_ring_tail = eth->phy_scratch_ring +
 			(sizeof(struct mtk_tx_dma) * (cnt - 1));
 
 	for (i = 0; i < cnt; i++) {
 		eth->scratch_ring[i].txd1 =
 					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
 		if (i < cnt - 1)
-			eth->scratch_ring[i].txd2 = (phy_ring_head +
+			eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring +
 				((i + 1) * sizeof(struct mtk_tx_dma)));
 		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
 	}
 
-	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD);
 	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
 	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
 	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
@@ -1210,6 +1210,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	for (i = 0; i < MTK_MAC_COUNT; i++)
 		if (eth->netdev[i])
 			netdev_reset_queue(eth->netdev[i]);
+	if (eth->scratch_ring) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+				  eth->scratch_ring,
+				  eth->phy_scratch_ring);
+		eth->scratch_ring = NULL;
+		eth->phy_scratch_ring = 0;
+	}
 	mtk_tx_clean(eth);
 	mtk_rx_clean(eth);
 	kfree(eth->scratch_head);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eed626d56ea4f1..57f7e8a3dbe29a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -357,6 +357,7 @@ struct mtk_rx_ring {
  * @rx_ring:		Pointer to the memore holding info about the RX ring
  * @rx_napi:		The NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
+ * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
  * @clk_ethif:		The ethif clock
  * @clk_esw:		The switch clock
@@ -384,6 +385,7 @@ struct mtk_eth {
 	struct mtk_rx_ring		rx_ring;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
+	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
 	struct clk			*clk_ethif;
 	struct clk			*clk_esw;

From 2fae723cefb8bfe712371978288aa0a70b54802e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:00 +0200
Subject: [PATCH 07/22] net: mediatek: invalid buffer lookup in mtk_tx_map()

The lookup of the tx_buffer in the error path inside mtk_tx_map() uses the
wrong descriptor pointer. This looks like a copy & paste error. Change the
code to use the correct pointer.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index fb8b17db7fa2b1..8c3e5438794819 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -674,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 err_dma:
 	do {
-		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
 		mtk_tx_unmap(&dev->dev, tx_buf);

From 94321a9fc9f5b6c6e949cc7c69741538f556ab74 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:01 +0200
Subject: [PATCH 08/22] net: mediatek: dropped rx packets are not being counted
 properly

There are two places inside mtk_poll_rx where rx_dropped is not being
incremented properly. Fix this by adding the missing code to increment
the counter.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8c3e5438794819..b6d3c217722d22 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -829,6 +829,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
 			skb_free_frag(new_data);
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 
@@ -836,6 +837,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			put_page(virt_to_head_page(new_data));
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);

From 6675086d04e7c0748cd5884f7c8611b5f0836250 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:02 +0200
Subject: [PATCH 09/22] net: mediatek: add next data pointer coherency
 protection

The QDMA engine can fail to update the register pointing to the next TX
descriptor if this bit does not get set in the QDMA configuration register.
Not setting this bit can result in invalid values inside the TX rings
registers which will causes TX stalls.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b6d3c217722d22..f30c2e474b8788 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1282,7 +1282,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS,
+		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
 	return 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 57f7e8a3dbe29a..a5eb7c62306b66 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -91,6 +91,7 @@
 #define MTK_QDMA_GLO_CFG	0x1A04
 #define MTK_RX_2B_OFFSET	BIT(31)
 #define MTK_RX_BT_32DWORDS	(3 << 11)
+#define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
 #define MTK_DMA_SIZE_16DWORDS	(2 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)

From 2ff0bb61646f286fa97db2904491974302a14f1f Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:03 +0200
Subject: [PATCH 10/22] net: mediatek: disable all interrupts during probe

The current code only disables those IRQs that we will later use. To
ensure that we have a predefined state, we really want to disable all IRQs.
Change the code to disable all IRQs to achieve this.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f30c2e474b8788..e3dadae9f6dd7b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1396,7 +1396,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 

From 04698cccb1de54d5d97fda2e4a1c6ca365da0f70 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:04 +0200
Subject: [PATCH 11/22] net: mediatek: fix threshold value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The logic to calculate the threshold value for stopping the TX queue is
bad. Currently it will always use 1/2 of the rings size, which is way too
much. Set the threshold to MAX_SKB_FRAGS. This makes sure that the queue
is stopped when there is not enough room to accept an additional segment. 

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e3dadae9f6dd7b..032939d211a7c6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1033,8 +1033,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
 	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
-	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
-			      MAX_SKB_FRAGS);
+	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue

From eaadf9fd3f6390f6ecbd0dfbd5997a0486fc9d5e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:05 +0200
Subject: [PATCH 12/22] net: mediatek: increase watchdog_timeo

During stress testing, after reducing the threshold value, we have seen
TX timeouts that were caused by the watchdog_timeo value being too low.
Increase the value to 5 * HZ which is a value commonly used by many other
drivers.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 032939d211a7c6..3b3ba2e55bdce0 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1709,7 +1709,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = HZ;
+	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &

From 12c97c13ea7174db5b5dc4a1ef91d4e9245bb569 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:06 +0200
Subject: [PATCH 13/22] net: mediatek: fix off by one in the TX ring allocation

The TX ring setup has an off by one error causing it to not utilise all
descriptors. This has the side effect that we need to reset the next
pointer at runtime to make it work. Fix the off by one and remove the
code fixing the ring at runtime.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3b3ba2e55bdce0..c097e9e3926c63 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -926,7 +926,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		}
 		mtk_tx_unmap(eth->dev, tx_buf);
 
-		ring->last_free->txd2 = next_cpu;
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
 
@@ -1032,7 +1031,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
-	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
 	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we

From ad3cba989e8b1bbefe078eece29f0e8d8aaea1d6 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:07 +0200
Subject: [PATCH 14/22] net: mediatek: only wake the queue if it is stopped

The current code unconditionally wakes up the queue at the end of each
tx_poll action. Change the code to only wake up the queues if any of
them have actually been stopped before.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index c097e9e3926c63..40e37b158a690f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -704,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static int mtk_queue_stopped(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		if (netif_queue_stopped(eth->netdev[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 static void mtk_wake_queue(struct mtk_eth *eth)
 {
 	int i;
@@ -950,7 +964,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 	if (!total)
 		return 0;
 
-	if (atomic_read(&ring->free_count) > ring->thresh)
+	if (mtk_queue_stopped(eth) &&
+	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
 
 	return total;

From 82c6544dddc6c4bd940917af2f987dee6be0fc17 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:08 +0200
Subject: [PATCH 15/22] net: mediatek: remove superfluous queue wake up call

The code checks if the queue should be stopped because we are below the
threshold of free descriptors only to check if it should be started again.
If we do end up in a state where we are at the threshold limit, it makes
more sense to just stop the queue and wait for the next IRQ to trigger the
TX housekeeping again. There is no rush in enqueuing the next packet, it
needs to wait for all the others in the queue to be dispatched first
anyway.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 40e37b158a690f..d1cdc2d7615158 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -783,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
-		if (unlikely(atomic_read(&ring->free_count) >
-			     ring->thresh))
-			mtk_wake_queue(eth);
-	}
+
 	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;

From 00a8231388d573195a77f6de63b05c4920589a8e Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Fri, 10 Jun 2016 16:53:05 +0200
Subject: [PATCH 16/22] net: au1000_eth: fix PHY detection

Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541
("phy: Add API for {un}registering an mdio device to a bus.")
broke PHY detection on this driver with a copy-paste bug:
The code is looking 32 times for a PHY at address 0.

Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have
their (autodetected) PHYs at address 31.

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/au1000_eth.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1122db42..d8c77e8e25edb1 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -508,13 +508,12 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* find the first (lowest address) PHY
 	 * on the current MAC's MII bus
 	 */
-	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
-			if (!aup->phy_search_highest_addr)
-				/* break out with first one found */
-				break;
-		}
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+		phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
+		if (phydev && !aup->phy_search_highest_addr)
+			/* break out with first one found */
+			break;
+	}
 
 	if (aup->phy1_search_mac0) {
 		/* try harder to find a PHY */

From 3ab51f9f08106467e705fbbd7d39447e14a40e98 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 10 Jun 2016 23:34:24 -0700
Subject: [PATCH 17/22] Revert "net: au1000_eth: fix PHY detection"

This reverts commit a2f27217e4e60e663b5b971b0ccb287a9548b04e.

I applied the wrong version of this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/au1000_eth.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index d8c77e8e25edb1..e0fb0f1122db42 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -508,12 +508,13 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* find the first (lowest address) PHY
 	 * on the current MAC's MII bus
 	 */
-	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
-		phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
-		if (phydev && !aup->phy_search_highest_addr)
-			/* break out with first one found */
-			break;
-	}
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
+		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+			if (!aup->phy_search_highest_addr)
+				/* break out with first one found */
+				break;
+		}
 
 	if (aup->phy1_search_mac0) {
 		/* try harder to find a PHY */

From 5b22d3747756a3e9bceac1acb76304faa0b38680 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Sat, 11 Jun 2016 00:13:04 +0200
Subject: [PATCH 18/22] net: au1000_eth: fix PHY detection

Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541
("phy: Add API for {un}registering an mdio device to a bus.")
broke PHY detection on this driver with a copy-paste bug:
The code is looking 32 times for a PHY at address 0.

Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have
their (autodetected) PHYs at address 31.

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/au1000_eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1122db42..20760e10211a48 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	 * on the current MAC's MII bus
 	 */
 	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+		if (mdiobus_get_phy(aup->mii_bus, phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
 			if (!aup->phy_search_highest_addr)
 				/* break out with first one found */
 				break;

From 600a28b9e5c0bd694212248aabb8bdc655837a63 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 11 Jun 2016 20:40:24 -0700
Subject: [PATCH 19/22] ipconfig: Protect ic_addrservaddr with
 IPCONFIG_DYNAMIC.

>> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable]
    static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index eccf9fd190c0be..1d71c40eaaf35a 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
+#ifdef IPCONFIG_DYNAMIC
 static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#endif
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 

From d8fafc04da8f46e0299a4ae4aa40cc17b87b5f9f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 12 Jun 2016 16:21:47 -0700
Subject: [PATCH 20/22] net_sched: prio: properly report out of memory errors

At Qdisc creation or change time, prio_tune() creates missing
pfifo qdiscs but does not return an error code if one
qdisc could not be allocated.

Leaving a qdisc in non operational state without telling user
anything about this problem is not good.

Also, testing if we replace something different than noop_qdisc
a second time makes no sense so I removed useless code.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_prio.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4b0a82191bc401..071718bccdab65 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -202,26 +202,18 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	sch_tree_unlock(sch);
 
 	for (i = 0; i < q->bands; i++) {
-		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child, *old;
-
-			child = qdisc_create_dflt(sch->dev_queue,
-						  &pfifo_qdisc_ops,
-						  TC_H_MAKE(sch->handle, i + 1));
-			if (child) {
-				sch_tree_lock(sch);
-				old = q->queues[i];
-				q->queues[i] = child;
-
-				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
-					qdisc_destroy(old);
-				}
-				sch_tree_unlock(sch);
-			}
-		}
+		struct Qdisc *child;
+
+		if (q->queues[i] != &noop_qdisc)
+			continue;
+
+		child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+					  TC_H_MAKE(sch->handle, i + 1));
+		if (!child)
+			return -ENOMEM;
+		sch_tree_lock(sch);
+		q->queues[i] = child;
+		sch_tree_unlock(sch);
 	}
 	return 0;
 }

From f4bbb2c4e3188a44bf4a45c7ca621586f9223b30 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Sat, 11 Jun 2016 01:11:54 +0300
Subject: [PATCH 21/22] net: ethernet: ti: cpsw: use destroy ctlr to destroy
 channels

There is no reason to destroy channels that are destroyed while
cpdma_ctlr destroy. In this case no need to remember how much
channels where created and destroy them by one, as cpdma_ctlr
destroys all of them.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index e6bb0ecb12c74c..53190894f17a04 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev)
 clean_ale_ret:
 	cpsw_ale_destroy(priv->ale);
 clean_dma_ret:
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
@@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	cpsw_ale_destroy(priv->ale);
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 	pm_runtime_disable(&pdev->dev);
 	device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);

From bbbbac7ece6614c593686a35c9b22a129153feb1 Mon Sep 17 00:00:00 2001
From: Daniel Metz <Daniel.Metz@rohde-schwarz.com>
Date: Fri, 10 Jun 2016 10:34:52 +0200
Subject: [PATCH 22/22] tcp: use RFC6298 compliant TCP RTO calculation

This patch adjusts Linux RTO calculation to be RFC6298 Standard
compliant. MinRTO is no longer added to the computed RTO, RTO damping
and overestimation are decreased.

In RFC 6298 Standard TCP Retransmission Timeout (RTO) calculation the
calculated RTO is rounded up to the Minimum RTO (MinRTO), if it is
less.  The Linux implementation as a discrepancy to the Standard
basically adds the defined MinRTO to the calculated RTO. When
comparing both approaches, the Linux calculation seems to perform
worse for sender limited TCP flows like Telnet, SSH or constant bit
rate encoded transmissions, especially for Round Trip Times (RTT) of
50ms to 800ms.

Compared to the Linux implementation the RFC 6298 proposed RTO
calculation performs better and more precise in adapting to current
network characteristics. Extensive measurements for bulk data did not
show a negative impact of the adjusted calculation.

Using the RFC 6298 compliant implementation, the tcp_sock struct variable
u32 mdev_max_us becomes obsolete and consequently is being removed.

Exemplary performance comparison for sender-limited-flows:

- Rate: 10Mbit/s
- Delay: 200ms, Delay Variation: 10ms
- Time between each scheduled segment: 1s
- Amount Data Segments: 300
- Mean of 11 runs

         Mean Response Waiting Time [milliseconds]

PER [%] |   0.5      1    1.5      2      3      5      7     10
--------+-------------------------------------------------------
old     | 206.4  208.6  218.0  218.6  227.2  249.3  274.7  308.2
new     | 203.9  206.0  207.0  209.9  217.3  225.6  238.7  259.1

Detailed analysis:

https://docs.google.com/document/d/1pKmPfnQb6fDK4qpiNVkN8cQyGE4wYDZukcuZfR-
BnnM/

Reasoning for historic design:

Sarolahti, P.; Kuznetsov, A. (2002). Congestion Control in Linux TCP.
Conference Paper. Proceedings of the FREENIX Track. 2002 USENIX Annual
https://www.cs.helsinki.fi/research/iwtcp/papers/linuxtcp.pdf

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: Daniel Metz <dmetz@mytum.de>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>

bla
---
 include/linux/tcp.h    |  1 -
 net/ipv4/tcp_input.c   | 74 ++++++++++--------------------------------
 net/ipv4/tcp_metrics.c |  2 +-
 3 files changed, 18 insertions(+), 59 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7be9b124235486..d1790c565d0ad1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -231,7 +231,6 @@ struct tcp_sock {
 /* RTT measurement */
 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */
 	u32	mdev_us;	/* medium deviation			*/
-	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
 	u32	rttvar_us;	/* smoothed mdev_max			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
 	struct rtt_meas {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6c8f4cd080001..252a1843cdfeb5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -680,8 +680,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 /* Called to compute a smoothed rtt estimate. The data fed to this
  * routine either comes from timestamps, or from segments that were
  * known _not_ to have been retransmitted [see Karn/Partridge
- * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
- * piece by Van Jacobson.
+ * Proceedings SIGCOMM 87].
  * NOTE: the next three routines used to be one big routine.
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
@@ -692,59 +691,21 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 	long m = mrtt_us; /* RTT */
 	u32 srtt = tp->srtt_us;
 
-	/*	The following amusing code comes from Jacobson's
-	 *	article in SIGCOMM '88.  Note that rtt and mdev
-	 *	are scaled versions of rtt and mean deviation.
-	 *	This is designed to be as fast as possible
-	 *	m stands for "measurement".
-	 *
-	 *	On a 1990 paper the rto value is changed to:
-	 *	RTO = rtt + 4 * mdev
-	 *
-	 * Funny. This algorithm seems to be very broken.
-	 * These formulae increase RTO, when it should be decreased, increase
-	 * too slowly, when it should be increased quickly, decrease too quickly
-	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
-	 * does not matter how to _calculate_ it. Seems, it was trap
-	 * that VJ failed to avoid. 8)
-	 */
 	if (srtt != 0) {
-		m -= (srtt >> 3);	/* m is now error in rtt est */
-		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
-		if (m < 0) {
-			m = -m;		/* m is now abs(error) */
-			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
-			/* This is similar to one of Eifel findings.
-			 * Eifel blocks mdev updates when rtt decreases.
-			 * This solution is a bit different: we use finer gain
-			 * for mdev in this case (alpha*beta).
-			 * Like Eifel it also prevents growth of rto,
-			 * but also it limits too fast rto decreases,
-			 * happening in pure Eifel.
-			 */
-			if (m > 0)
-				m >>= 3;
-		} else {
-			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
-		}
-		tp->mdev_us += m;		/* mdev = 3/4 mdev + 1/4 new */
-		if (tp->mdev_us > tp->mdev_max_us) {
-			tp->mdev_max_us = tp->mdev_us;
-			if (tp->mdev_max_us > tp->rttvar_us)
-				tp->rttvar_us = tp->mdev_max_us;
-		}
-		if (after(tp->snd_una, tp->rtt_seq)) {
-			if (tp->mdev_max_us < tp->rttvar_us)
-				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
+		m -= (srtt >> 3);	 /* m' = m - srtt / 8 = (R' - SRTT) */
+		srtt += m;	/* srtt = srtt + m’ = srtt + m - srtt / 8 */
+		if (m < 0)
+			m = -m;
+		m -= (tp->mdev_us >> 2); /* m'' = |m'| - mdev / 4 */
+		tp->mdev_us += m;
+		tp->rttvar_us = tp->mdev_us;
+		if (after(tp->snd_una, tp->rtt_seq))
 			tp->rtt_seq = tp->snd_nxt;
-			tp->mdev_max_us = tcp_rto_min_us(sk);
-		}
 	} else {
 		/* no previous measure. */
-		srtt = m << 3;		/* take the measured time to be rtt */
-		tp->mdev_us = m << 1;	/* make sure rto = 3*rtt */
-		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
-		tp->mdev_max_us = tp->rttvar_us;
+		srtt = m << 3;
+		tp->mdev_us = m << 1;
+		tp->rttvar_us = tp->mdev_us;
 		tp->rtt_seq = tp->snd_nxt;
 	}
 	tp->srtt_us = max(1U, srtt);
@@ -798,6 +759,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
  */
 static void tcp_set_rto(struct sock *sk)
 {
+	const u32 min_rto = tcp_rto_min_us(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	/* Old crap is replaced with new one. 8)
 	 *
@@ -809,17 +771,15 @@ static void tcp_set_rto(struct sock *sk)
 	 *    is invisible. Actually, Linux-2.4 also generates erratic
 	 *    ACKs in some circumstances.
 	 */
-	inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
-
+	if (((tp->srtt_us >> 3) + tp->rttvar_us) < min_rto)
+		inet_csk(sk)->icsk_rto = usecs_to_jiffies(min_rto);
+	else
+		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
 	/* 2. Fixups made earlier cannot be right.
 	 *    If we do not estimate RTO correctly without them,
 	 *    all the algo is pure shit and should be replaced
 	 *    with correct one. It is exactly, which we pretend to do.
 	 */
-
-	/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
-	 * guarantees that rto is higher.
-	 */
 	tcp_bound_rto(sk);
 }
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b617826e24770a..7f59f5b979c704 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -561,7 +561,7 @@ void tcp_init_metrics(struct sock *sk)
 		 * retransmission.
 		 */
 		tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
-		tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
+		tp->mdev_us = tp->rttvar_us;
 
 		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
 	}