From aa1cf25887099bba68f1f3879c0d394e08b8779f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 27 Oct 2014 09:14:30 +0900 Subject: [PATCH 001/138] ata: sata_rcar: Disable DIPM mode for r8a7790 ES1 Unlike other SATA R-Car r8a7790 controllers the r8a7790 ES1 SATA R-Car controller needs to be run with DIPM disabled. Signed-off-by: Simon Horman Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- Documentation/devicetree/bindings/ata/sata_rcar.txt | 3 ++- drivers/ata/sata_rcar.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt index 1e6111333fa88f..7dd32d321a3433 100644 --- a/Documentation/devicetree/bindings/ata/sata_rcar.txt +++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt @@ -3,7 +3,8 @@ Required properties: - compatible : should contain one of the following: - "renesas,sata-r8a7779" for R-Car H1 - - "renesas,sata-r8a7790" for R-Car H2 + - "renesas,sata-r8a7790-es1" for R-Car H2 ES1 + - "renesas,sata-r8a7790" for R-Car H2 other than ES1 - "renesas,sata-r8a7791" for R-Car M2 - reg : address and length of the SATA registers; - interrupts : must consist of one interrupt specifier. diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 61eb6d77dac7f5..8732e42db3a927 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -146,6 +146,7 @@ enum sata_rcar_type { RCAR_GEN1_SATA, RCAR_GEN2_SATA, + RCAR_R8A7790_ES1_SATA, }; struct sata_rcar_priv { @@ -763,6 +764,9 @@ static void sata_rcar_setup_port(struct ata_host *host) ap->udma_mask = ATA_UDMA6; ap->flags |= ATA_FLAG_SATA; + if (priv->type == RCAR_R8A7790_ES1_SATA) + ap->flags |= ATA_FLAG_NO_DIPM; + ioaddr->cmd_addr = base + SDATA_REG; ioaddr->ctl_addr = base + SSDEVCON_REG; ioaddr->scr_addr = base + SCRSSTS_REG; @@ -792,6 +796,7 @@ static void sata_rcar_init_controller(struct ata_host *host) sata_rcar_gen1_phy_init(priv); break; case RCAR_GEN2_SATA: + case RCAR_R8A7790_ES1_SATA: sata_rcar_gen2_phy_init(priv); break; default: @@ -837,6 +842,10 @@ static struct of_device_id sata_rcar_match[] = { .compatible = "renesas,sata-r8a7790", .data = (void *)RCAR_GEN2_SATA }, + { + .compatible = "renesas,sata-r8a7790-es1", + .data = (void *)RCAR_R8A7790_ES1_SATA + }, { .compatible = "renesas,sata-r8a7791", .data = (void *)RCAR_GEN2_SATA @@ -849,6 +858,7 @@ static const struct platform_device_id sata_rcar_id_table[] = { { "sata_rcar", RCAR_GEN1_SATA }, /* Deprecated by "sata-r8a7779" */ { "sata-r8a7779", RCAR_GEN1_SATA }, { "sata-r8a7790", RCAR_GEN2_SATA }, + { "sata-r8a7790-es1", RCAR_R8A7790_ES1_SATA }, { "sata-r8a7791", RCAR_GEN2_SATA }, { }, }; From 03e83cbd34a4602bf0d750e5ff4bf8b7d0e066b2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Oct 2014 12:00:01 -0400 Subject: [PATCH 002/138] Revert "AHCI: Do not acquire ata_host::lock from single IRQ handler" This reverts commit 33fb0d01ce60fe4c0c12c4f0c134c5cdb818ac5a. 18dcf433f3de ("AHCI: Optimize single IRQ interrupt processing") is scheduled to be reverted. This is an optimization dependent on the mentioned commit. Revert it first. Signed-off-by: Tejun Heo --- drivers/ata/libahci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 5eb61c9e63da95..3ce3d23e4f97cc 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1875,6 +1875,8 @@ static irqreturn_t ahci_single_irq_intr(int irq, void *dev_instance) irq_masked = irq_stat & hpriv->port_map; + spin_lock(&host->lock); + for (i = 0; i < host->n_ports; i++) { struct ata_port *ap; @@ -1906,6 +1908,8 @@ static irqreturn_t ahci_single_irq_intr(int irq, void *dev_instance) */ writel(irq_stat, mmio + HOST_IRQ_STAT); + spin_unlock(&host->lock); + VPRINTK("EXIT\n"); return handled ? IRQ_WAKE_THREAD : IRQ_NONE; From 7865f83fd2f23cbf3cd8ad0ddc2cef796f005aaf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Oct 2014 09:50:36 -0400 Subject: [PATCH 003/138] Revert "AHCI: Optimize single IRQ interrupt processing" This reverts commit 18dcf433f3ded61eb140a55e7048ec2fef79e723. IRQF_ONESHOT was missing from the conversion causing screaming interrupts problems on some setups and LKP detected measureable drop in IO performance. It looks like we'll first need to drop the threaded IRQ handling first before splitting locking. Signed-off-by: Tejun Heo Cc: Alexander Gordeev Reported-by: kernel test robot Reported-by: Marc Zyngier Link: http://lkml.kernel.org/g/20141027021651.GF27038@yliu-dev.sh.intel.com Link: http://lkml.kernel.org/g/1414082970-20775-1-git-send-email-marc.zyngier@arm.com --- drivers/ata/libahci.c | 74 ++++++++----------------------------------- 1 file changed, 13 insertions(+), 61 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 3ce3d23e4f97cc..97683e45ab043b 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1778,16 +1778,15 @@ static void ahci_handle_port_interrupt(struct ata_port *ap, } } -static void ahci_update_intr_status(struct ata_port *ap) +static void ahci_port_intr(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); - struct ahci_port_priv *pp = ap->private_data; u32 status; status = readl(port_mmio + PORT_IRQ_STAT); writel(status, port_mmio + PORT_IRQ_STAT); - atomic_or(status, &pp->intr_status); + ahci_handle_port_interrupt(ap, port_mmio, status); } static irqreturn_t ahci_port_thread_fn(int irq, void *dev_instance) @@ -1808,34 +1807,6 @@ static irqreturn_t ahci_port_thread_fn(int irq, void *dev_instance) return IRQ_HANDLED; } -irqreturn_t ahci_thread_fn(int irq, void *dev_instance) -{ - struct ata_host *host = dev_instance; - struct ahci_host_priv *hpriv = host->private_data; - u32 irq_masked = hpriv->port_map; - unsigned int i; - - for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap; - - if (!(irq_masked & (1 << i))) - continue; - - ap = host->ports[i]; - if (ap) { - ahci_port_thread_fn(irq, ap); - VPRINTK("port %u\n", i); - } else { - VPRINTK("port %u (no irq)\n", i); - if (ata_ratelimit()) - dev_warn(host->dev, - "interrupt on disabled port %u\n", i); - } - } - - return IRQ_HANDLED; -} - static irqreturn_t ahci_multi_irqs_intr(int irq, void *dev_instance) { struct ata_port *ap = dev_instance; @@ -1885,7 +1856,7 @@ static irqreturn_t ahci_single_irq_intr(int irq, void *dev_instance) ap = host->ports[i]; if (ap) { - ahci_update_intr_status(ap); + ahci_port_intr(ap); VPRINTK("port %u\n", i); } else { VPRINTK("port %u (no irq)\n", i); @@ -1912,7 +1883,7 @@ static irqreturn_t ahci_single_irq_intr(int irq, void *dev_instance) VPRINTK("EXIT\n"); - return handled ? IRQ_WAKE_THREAD : IRQ_NONE; + return IRQ_RETVAL(handled); } unsigned int ahci_qc_issue(struct ata_queued_cmd *qc) @@ -2324,8 +2295,13 @@ static int ahci_port_start(struct ata_port *ap) */ pp->intr_mask = DEF_PORT_IRQ; - spin_lock_init(&pp->lock); - ap->lock = &pp->lock; + /* + * Switch to per-port locking in case each port has its own MSI vector. + */ + if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) { + spin_lock_init(&pp->lock); + ap->lock = &pp->lock; + } ap->private_data = pp; @@ -2486,31 +2462,6 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, int irq, return rc; } -static int ahci_host_activate_single_irq(struct ata_host *host, int irq, - struct scsi_host_template *sht) -{ - int i, rc; - - rc = ata_host_start(host); - if (rc) - return rc; - - rc = devm_request_threaded_irq(host->dev, irq, ahci_single_irq_intr, - ahci_thread_fn, IRQF_SHARED, - dev_driver_string(host->dev), host); - if (rc) - return rc; - - for (i = 0; i < host->n_ports; i++) - ata_port_desc(host->ports[i], "irq %d", irq); - - rc = ata_host_register(host, sht); - if (rc) - devm_free_irq(host->dev, irq, host); - - return rc; -} - /** * ahci_host_activate - start AHCI host, request IRQs and register it * @host: target ATA host @@ -2536,7 +2487,8 @@ int ahci_host_activate(struct ata_host *host, int irq, if (hpriv->flags & AHCI_HFLAG_MULTI_MSI) rc = ahci_host_activate_multi_irqs(host, irq, sht); else - rc = ahci_host_activate_single_irq(host, irq, sht); + rc = ata_host_activate(host, irq, ahci_single_irq_intr, + IRQF_SHARED, sht); return rc; } EXPORT_SYMBOL_GPL(ahci_host_activate); From 66a7cbc303f4d28f201529b06061944d51ab530c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Oct 2014 10:22:56 -0400 Subject: [PATCH 004/138] ahci: disable MSI instead of NCQ on Samsung pci-e SSDs on macbooks Samsung pci-e SSDs on macbooks failed miserably on NCQ commands, so 67809f85d31e ("ahci: disable NCQ on Samsung pci-e SSDs on macbooks") disabled NCQ on them. It turns out that NCQ is fine as long as MSI is not used, so let's turn off MSI and leave NCQ on. Signed-off-by: Tejun Heo Link: https://bugzilla.kernel.org/show_bug.cgi?id=60731 Tested-by: Tested-by: Imre Kaloz Cc: stable@vger.kernel.org Fixes: 67809f85d31e ("ahci: disable NCQ on Samsung pci-e SSDs on macbooks") --- drivers/ata/ahci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 5f039f1910677f..ef4b647b3ed2b0 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -60,6 +60,7 @@ enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, board_ahci_ign_iferr, + board_ahci_nomsi, board_ahci_noncq, board_ahci_nosntf, board_ahci_yes_fbs, @@ -121,6 +122,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_nomsi] = { + AHCI_HFLAGS (AHCI_HFLAG_NO_MSI), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_noncq] = { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ), .flags = AHCI_FLAG_COMMON, @@ -475,10 +483,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ /* - * Samsung SSDs found on some macbooks. NCQ times out. - * https://bugzilla.kernel.org/show_bug.cgi?id=60731 + * Samsung SSDs found on some macbooks. NCQ times out if MSI is + * enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731 */ - { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_noncq }, + { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi }, /* Enmotus */ { PCI_DEVICE(0x1c44, 0x8000), board_ahci }, From 690000b930456a98663567d35dd5c54b688d1e3f Mon Sep 17 00:00:00 2001 From: James Ralston Date: Mon, 13 Oct 2014 15:16:38 -0700 Subject: [PATCH 005/138] ahci: Add Device IDs for Intel Sunrise Point PCH This patch adds the AHCI-mode SATA Device IDs for the Intel Sunrise Point PCH. Signed-off-by: James Ralston Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index ef4b647b3ed2b0..89c08113411285 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -321,6 +321,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From e35b98849f2530bb77f8fe649b3eaa1489ff9d33 Mon Sep 17 00:00:00 2001 From: Koji Matsuoka Date: Tue, 28 Oct 2014 12:45:32 +0900 Subject: [PATCH 006/138] ata: sata_rcar: Add r8a7793 device support Signed-off-by: Koji Matsuoka Signed-off-by: Yoshihiro Kaneko Signed-off-by: Tejun Heo --- Documentation/devicetree/bindings/ata/sata_rcar.txt | 3 ++- drivers/ata/sata_rcar.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt index 7dd32d321a3433..80ae87a0784bdb 100644 --- a/Documentation/devicetree/bindings/ata/sata_rcar.txt +++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt @@ -5,7 +5,8 @@ Required properties: - "renesas,sata-r8a7779" for R-Car H1 - "renesas,sata-r8a7790-es1" for R-Car H2 ES1 - "renesas,sata-r8a7790" for R-Car H2 other than ES1 - - "renesas,sata-r8a7791" for R-Car M2 + - "renesas,sata-r8a7791" for R-Car M2-W + - "renesas,sata-r8a7793" for R-Car M2-N - reg : address and length of the SATA registers; - interrupts : must consist of one interrupt specifier. diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 8732e42db3a927..ea1fbc1d4c5f11 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -850,6 +850,10 @@ static struct of_device_id sata_rcar_match[] = { .compatible = "renesas,sata-r8a7791", .data = (void *)RCAR_GEN2_SATA }, + { + .compatible = "renesas,sata-r8a7793", + .data = (void *)RCAR_GEN2_SATA + }, { }, }; MODULE_DEVICE_TABLE(of, sata_rcar_match); @@ -860,6 +864,7 @@ static const struct platform_device_id sata_rcar_id_table[] = { { "sata-r8a7790", RCAR_GEN2_SATA }, { "sata-r8a7790-es1", RCAR_R8A7790_ES1_SATA }, { "sata-r8a7791", RCAR_GEN2_SATA }, + { "sata-r8a7793", RCAR_GEN2_SATA }, { }, }; MODULE_DEVICE_TABLE(platform, sata_rcar_id_table); From 7b358f0652dc09069ee19c4cda52fb20a1fa582a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 23 Oct 2014 14:42:33 +0300 Subject: [PATCH 007/138] iwlwifi: mvm: initialize the cur_ucode upon boot mvm->cur_ucode wasn't set before we actually load the firmware. This caused issues when we boot in RFKILL since we get an RFKILL interrupt upon boot even before we load any firmware. This leads to issues since iwl_mvm_set_hw_rfkill_state (the RFKILL interrupts handler in mvm) relies on this variable. Fix this. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 48cb25a93591a4..27fb0a1ef274b6 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -424,6 +424,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, } mvm->sf_state = SF_UNINIT; mvm->low_latency_agg_frame_limit = 6; + mvm->cur_ucode = IWL_UCODE_INIT; mutex_init(&mvm->mutex); mutex_init(&mvm->d0i3_suspend_mutex); From 805dbe17d1c832ad341f14fae8cedf41b67ca6fa Mon Sep 17 00:00:00 2001 From: Junjie Mao Date: Tue, 28 Oct 2014 09:31:47 +0800 Subject: [PATCH 008/138] mac80211_hwsim: release driver when ieee80211_register_hw fails The driver is not released when ieee80211_register_hw fails in mac80211_hwsim_create_radio, leading to the access to the unregistered (and possibly freed) device in platform_driver_unregister: [ 0.447547] mac80211_hwsim: ieee80211_register_hw failed (-2) [ 0.448292] ------------[ cut here ]------------ [ 0.448854] WARNING: CPU: 0 PID: 1 at ../include/linux/kref.h:47 kobject_get+0x33/0x50() [ 0.449839] CPU: 0 PID: 1 Comm: swapper Not tainted 3.17.0-00001-gdd46990-dirty #2 [ 0.450813] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 0.451512] 00000000 00000000 78025e38 7967c6c6 78025e68 7905e09b 7988b480 00000000 [ 0.452579] 00000001 79887d62 0000002f 79170bb3 79170bb3 78397008 79ac9d74 00000001 [ 0.453614] 78025e78 7905e15d 00000009 00000000 78025e84 79170bb3 78397000 78025e8c [ 0.454632] Call Trace: [ 0.454921] [<7967c6c6>] dump_stack+0x16/0x18 [ 0.455453] [<7905e09b>] warn_slowpath_common+0x6b/0x90 [ 0.456067] [<79170bb3>] ? kobject_get+0x33/0x50 [ 0.456612] [<79170bb3>] ? kobject_get+0x33/0x50 [ 0.457155] [<7905e15d>] warn_slowpath_null+0x1d/0x20 [ 0.457748] [<79170bb3>] kobject_get+0x33/0x50 [ 0.458274] [<7925824f>] get_device+0xf/0x20 [ 0.458779] [<7925b5cd>] driver_detach+0x3d/0xa0 [ 0.459331] [<7925a3ff>] bus_remove_driver+0x8f/0xb0 [ 0.459927] [<7925bf80>] ? class_unregister+0x40/0x80 [ 0.460660] [<7925bad7>] driver_unregister+0x47/0x50 [ 0.461248] [<7925c033>] ? class_destroy+0x13/0x20 [ 0.461824] [<7925d07b>] platform_driver_unregister+0xb/0x10 [ 0.462507] [<79b51ba0>] init_mac80211_hwsim+0x3e8/0x3f9 [ 0.463161] [<79b30c58>] do_one_initcall+0x106/0x1a9 [ 0.463758] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.464393] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.465001] [<79071935>] ? parse_args+0x2f5/0x480 [ 0.465569] [<7906b41e>] ? __usermodehelper_set_disable_depth+0x3e/0x50 [ 0.466345] [<79b30dd9>] kernel_init_freeable+0xde/0x17d [ 0.466972] [<79b304d6>] ? do_early_param+0x7a/0x7a [ 0.467546] [<79677b1b>] kernel_init+0xb/0xe0 [ 0.468072] [<79075f42>] ? schedule_tail+0x12/0x40 [ 0.468658] [<79686580>] ret_from_kernel_thread+0x20/0x30 [ 0.469303] [<79677b10>] ? rest_init+0xc0/0xc0 [ 0.469829] ---[ end trace ad8ac403ff8aef5c ]--- [ 0.470509] ------------[ cut here ]------------ [ 0.471047] WARNING: CPU: 0 PID: 1 at ../kernel/locking/lockdep.c:3161 __lock_acquire.isra.22+0x7aa/0xb00() [ 0.472163] DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS) [ 0.472774] CPU: 0 PID: 1 Comm: swapper Tainted: G W 3.17.0-00001-gdd46990-dirty #2 [ 0.473815] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 0.474492] 78025de0 78025de0 78025da0 7967c6c6 78025dd0 7905e09b 79888931 78025dfc [ 0.475515] 00000001 79888a93 00000c59 7907f33a 7907f33a 78028000 fffe9d09 00000000 [ 0.476519] 78025de8 7905e10e 00000009 78025de0 79888931 78025dfc 78025e24 7907f33a [ 0.477523] Call Trace: [ 0.477821] [<7967c6c6>] dump_stack+0x16/0x18 [ 0.478352] [<7905e09b>] warn_slowpath_common+0x6b/0x90 [ 0.478976] [<7907f33a>] ? __lock_acquire.isra.22+0x7aa/0xb00 [ 0.479658] [<7907f33a>] ? __lock_acquire.isra.22+0x7aa/0xb00 [ 0.480417] [<7905e10e>] warn_slowpath_fmt+0x2e/0x30 [ 0.480479] [<7907f33a>] __lock_acquire.isra.22+0x7aa/0xb00 [ 0.480479] [<79078aa5>] ? sched_clock_cpu+0xb5/0xf0 [ 0.480479] [<7907fd06>] lock_acquire+0x56/0x70 [ 0.480479] [<7925b5e8>] ? driver_detach+0x58/0xa0 [ 0.480479] [<79682d11>] mutex_lock_nested+0x61/0x2a0 [ 0.480479] [<7925b5e8>] ? driver_detach+0x58/0xa0 [ 0.480479] [<7925b5e8>] ? driver_detach+0x58/0xa0 [ 0.480479] [<7925b5e8>] driver_detach+0x58/0xa0 [ 0.480479] [<7925a3ff>] bus_remove_driver+0x8f/0xb0 [ 0.480479] [<7925bf80>] ? class_unregister+0x40/0x80 [ 0.480479] [<7925bad7>] driver_unregister+0x47/0x50 [ 0.480479] [<7925c033>] ? class_destroy+0x13/0x20 [ 0.480479] [<7925d07b>] platform_driver_unregister+0xb/0x10 [ 0.480479] [<79b51ba0>] init_mac80211_hwsim+0x3e8/0x3f9 [ 0.480479] [<79b30c58>] do_one_initcall+0x106/0x1a9 [ 0.480479] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.480479] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.480479] [<79071935>] ? parse_args+0x2f5/0x480 [ 0.480479] [<7906b41e>] ? __usermodehelper_set_disable_depth+0x3e/0x50 [ 0.480479] [<79b30dd9>] kernel_init_freeable+0xde/0x17d [ 0.480479] [<79b304d6>] ? do_early_param+0x7a/0x7a [ 0.480479] [<79677b1b>] kernel_init+0xb/0xe0 [ 0.480479] [<79075f42>] ? schedule_tail+0x12/0x40 [ 0.480479] [<79686580>] ret_from_kernel_thread+0x20/0x30 [ 0.480479] [<79677b10>] ? rest_init+0xc0/0xc0 [ 0.480479] ---[ end trace ad8ac403ff8aef5d ]--- [ 0.495478] BUG: unable to handle kernel paging request at 00200200 [ 0.496257] IP: [<79682de5>] mutex_lock_nested+0x135/0x2a0 [ 0.496923] *pde = 00000000 [ 0.497290] Oops: 0002 [#1] [ 0.497653] CPU: 0 PID: 1 Comm: swapper Tainted: G W 3.17.0-00001-gdd46990-dirty #2 [ 0.498659] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 0.499321] task: 78028000 ti: 78024000 task.ti: 78024000 [ 0.499955] EIP: 0060:[<79682de5>] EFLAGS: 00010097 CPU: 0 [ 0.500620] EIP is at mutex_lock_nested+0x135/0x2a0 [ 0.501145] EAX: 00200200 EBX: 78397434 ECX: 78397460 EDX: 78025e70 [ 0.501816] ESI: 00000246 EDI: 78028000 EBP: 78025e8c ESP: 78025e54 [ 0.502497] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 0.503076] CR0: 8005003b CR2: 00200200 CR3: 01b9d000 CR4: 00000690 [ 0.503773] Stack: [ 0.503998] 00000000 00000001 00000000 7925b5e8 78397460 7925b5e8 78397474 78397460 [ 0.504944] 00200200 11111111 78025e70 78397000 79ac9d74 00000001 78025ea0 7925b5e8 [ 0.505451] 79ac9d74 fffffffe 00000001 78025ebc 7925a3ff 7a251398 78025ec8 7925bf80 [ 0.505451] Call Trace: [ 0.505451] [<7925b5e8>] ? driver_detach+0x58/0xa0 [ 0.505451] [<7925b5e8>] ? driver_detach+0x58/0xa0 [ 0.505451] [<7925b5e8>] driver_detach+0x58/0xa0 [ 0.505451] [<7925a3ff>] bus_remove_driver+0x8f/0xb0 [ 0.505451] [<7925bf80>] ? class_unregister+0x40/0x80 [ 0.505451] [<7925bad7>] driver_unregister+0x47/0x50 [ 0.505451] [<7925c033>] ? class_destroy+0x13/0x20 [ 0.505451] [<7925d07b>] platform_driver_unregister+0xb/0x10 [ 0.505451] [<79b51ba0>] init_mac80211_hwsim+0x3e8/0x3f9 [ 0.505451] [<79b30c58>] do_one_initcall+0x106/0x1a9 [ 0.505451] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.505451] [<79b517b8>] ? if_spi_init_module+0xac/0xac [ 0.505451] [<79071935>] ? parse_args+0x2f5/0x480 [ 0.505451] [<7906b41e>] ? __usermodehelper_set_disable_depth+0x3e/0x50 [ 0.505451] [<79b30dd9>] kernel_init_freeable+0xde/0x17d [ 0.505451] [<79b304d6>] ? do_early_param+0x7a/0x7a [ 0.505451] [<79677b1b>] kernel_init+0xb/0xe0 [ 0.505451] [<79075f42>] ? schedule_tail+0x12/0x40 [ 0.505451] [<79686580>] ret_from_kernel_thread+0x20/0x30 [ 0.505451] [<79677b10>] ? rest_init+0xc0/0xc0 [ 0.505451] Code: 89 d8 e8 cf 9b 9f ff 8b 4f 04 8d 55 e4 89 d8 e8 72 9d 9f ff 8d 43 2c 89 c1 89 45 d8 8b 43 30 8d 55 e4 89 53 30 89 4d e4 89 45 e8 <89> 10 8b 55 dc 8b 45 e0 89 7d ec e8 db af 9f ff eb 11 90 31 c0 [ 0.505451] EIP: [<79682de5>] mutex_lock_nested+0x135/0x2a0 SS:ESP 0068:78025e54 [ 0.505451] CR2: 0000000000200200 [ 0.505451] ---[ end trace ad8ac403ff8aef5e ]--- [ 0.505451] Kernel panic - not syncing: Fatal exception Fixes: 9ea927748ced ("mac80211_hwsim: Register and bind to driver") Reported-by: Fengguang Wu Signed-off-by: Junjie Mao Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index babbdc1ce741c6..c9ad4cf1adfb4b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1987,7 +1987,7 @@ static int mac80211_hwsim_create_radio(int channels, const char *reg_alpha2, if (err != 0) { printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n", err); - goto failed_hw; + goto failed_bind; } skb_queue_head_init(&data->pending); @@ -2183,6 +2183,8 @@ static int mac80211_hwsim_create_radio(int channels, const char *reg_alpha2, return idx; failed_hw: + device_release_driver(data->dev); +failed_bind: device_unregister(data->dev); failed_drvdata: ieee80211_free_hw(hw); From 10b68487869031828aede7313c2befc53d6d30ec Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 27 Oct 2014 11:56:06 +0100 Subject: [PATCH 009/138] mac80211: flush keys for AP mode on ieee80211_do_stop Userspace can add keys to an AP mode interface before start_ap has been called. If there have been no calls to start_ap/stop_ap in the mean time, the keys will still be around when the interface is brought down. Signed-off-by: Felix Fietkau [adjust comments, fix AP_VLAN case] Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index af237223a8cd9b..3b9e2b7b3f307c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -898,6 +898,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, list_del(&sdata->u.vlan.list); mutex_unlock(&local->mtx); RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); + /* see comment in the default case below */ + ieee80211_free_keys(sdata, true); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -923,17 +925,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* * When we get here, the interface is marked down. * Free the remaining keys, if there are any - * (shouldn't be, except maybe in WDS mode?) + * (which can happen in AP mode if userspace sets + * keys before the interface is operating, and maybe + * also in WDS mode) * * Force the key freeing to always synchronize_net() * to wait for the RX path in case it is using this - * interface enqueuing frames * at this very time on + * interface enqueuing frames at this very time on * another CPU. */ ieee80211_free_keys(sdata, true); - - /* fall through */ - case NL80211_IFTYPE_AP: skb_queue_purge(&sdata->skb_queue); } From 84469a45a1bedec9918e94ab2f78c5dc0739e4a7 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 28 Oct 2014 13:33:04 +0200 Subject: [PATCH 010/138] mac80211: use secondary channel offset IE also beacons during CSA If we are switching from an HT40+ to an HT40- channel (or vice-versa), we need the secondary channel offset IE to specify what is the post-CSA offset to be used. This applies both to beacons and to probe responses. In ieee80211_parse_ch_switch_ie() we were ignoring this IE from beacons and using the *current* HT information IE instead. This was causing us to use the same offset as before the switch. Fix that by using the secondary channel offset IE also for beacons and don't ever use the pre-switch offset. Additionally, remove the "beacon" argument from ieee80211_parse_ch_switch_ie(), since it's not needed anymore. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 2 +- net/mac80211/spectmgmt.c | 18 ++++++------------ 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 56b53571c8077a..509bc157ce5551 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -805,7 +805,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); memset(&csa_ie, 0, sizeof(csa_ie)); - err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, + err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, sta_flags, ifibss->bssid, &csa_ie); /* can't switch to destination channel, fail */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c2aaec4dfcf0ea..8c68da30595df7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1642,7 +1642,6 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * ieee80211_parse_ch_switch_ie - parses channel switch IEs * @sdata: the sdata of the interface which has received the frame * @elems: parsed 802.11 elements received with the frame - * @beacon: indicates if the frame was a beacon or probe response * @current_band: indicates the current band * @sta_flags: contains information about own capabilities and restrictions * to decide which channel switch announcements can be accepted. Only the @@ -1656,7 +1655,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * Return: 0 on success, <0 on error and >0 if there is nothing to parse. */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, - struct ieee802_11_elems *elems, bool beacon, + struct ieee802_11_elems *elems, enum ieee80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index e9f99c1e3fad59..0c8b2a77d312d5 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -874,7 +874,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); memset(&csa_ie, 0, sizeof(csa_ie)); - err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, band, + err = ieee80211_parse_ch_switch_ie(sdata, elems, band, sta_flags, sdata->vif.addr, &csa_ie); if (err < 0) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2de88704278b85..08f51c6d0953a4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1072,7 +1072,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, current_band = cbss->channel->band; memset(&csa_ie, 0, sizeof(csa_ie)); - res = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, current_band, + res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, ifmgd->flags, ifmgd->associated->bssid, &csa_ie); if (res < 0) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 6ab00907008461..efeba56c913bae 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -22,7 +22,7 @@ #include "wme.h" int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, - struct ieee802_11_elems *elems, bool beacon, + struct ieee802_11_elems *elems, enum ieee80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) @@ -91,19 +91,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - if (!beacon && sec_chan_offs) { + if (sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; - } else if (beacon && ht_oper) { - secondary_channel_offset = - ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; } else if (!(sta_flags & IEEE80211_STA_DISABLE_HT)) { - /* If it's not a beacon, HT is enabled and the IE not present, - * it's 20 MHz, 802.11-2012 8.5.2.6: - * This element [the Secondary Channel Offset Element] is - * present when switching to a 40 MHz channel. It may be - * present when switching to a 20 MHz channel (in which - * case the secondary channel offset is set to SCN). - */ + /* If the secondary channel offset IE is not present, + * we can't know what's the post-CSA offset, so the + * best we can do is use 20MHz. + */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } From ff1e417c7c239b7abfe70aa90460a77eaafc7f83 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 28 Oct 2014 13:33:05 +0200 Subject: [PATCH 011/138] mac80211: schedule the actual switch of the station before CSA count 0 Due to the time it takes to process the beacon that started the CSA process, we may be late for the switch if we try to reach exactly beacon 0. To avoid that, use count - 1 when calculating the switch time. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 08f51c6d0953a4..93af0f1c9d991a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1168,7 +1168,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - TU_TO_EXP_TIME(csa_ie.count * cbss->beacon_interval)); + TU_TO_EXP_TIME((csa_ie.count - 1) * + cbss->beacon_interval)); } static bool From 222584647f2821c4d68421c1d07c89b375d501b2 Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Mon, 27 Oct 2014 13:55:45 +0800 Subject: [PATCH 012/138] ACPI / blacklist: blacklist Win8 OSI for Dell Vostro 3546 The wireless hotkey of Dell Vostro 3546 does not work with Win8 OSI. Due to insufficient documentation for the driver implementation, blacklist it as a workaround. Signed-off-by: Adam Lee Signed-off-by: Rafael J. Wysocki --- drivers/acpi/blacklist.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index ed122e17636e32..7556e7c4a055cd 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -290,6 +290,14 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3446"), }, }, + { + .callback = dmi_disable_osi_win8, + .ident = "Dell Vostro 3546", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3546"), + }, + }, /* * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. From 46238845bd609a5c0fbe076e1b82b4c5b33360b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2014 20:56:42 +0200 Subject: [PATCH 013/138] mac80211: properly flush delayed scan work on interface removal When an interface is deleted, an ongoing hardware scan is canceled and the driver must abort the scan, at the very least reporting completion while the interface is removed. However, if it scheduled the work that might only run after everything is said and done, which leads to cfg80211 warning that the scan isn't reported as finished yet; this is no fault of the driver, it already did, but mac80211 hasn't processed it. To fix this situation, flush the delayed work when the interface being removed is the one that was executing the scan. Cc: stable@vger.kernel.org Reported-by: Sujith Manoharan Tested-by: Sujith Manoharan Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3b9e2b7b3f307c..653f5eb07a27f4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -766,10 +766,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, int i, flushed; struct ps_data *ps; struct cfg80211_chan_def chandef; + bool cancel_scan; clear_bit(SDATA_STATE_RUNNING, &sdata->state); - if (rcu_access_pointer(local->scan_sdata) == sdata) + cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; + if (cancel_scan) ieee80211_scan_cancel(local); /* @@ -992,6 +994,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); + if (cancel_scan) + flush_delayed_work(&local->scan_work); + if (local->open_count == 0) { ieee80211_stop_device(local); From ece9c72accdc45c3a9484dacb1125ce572647288 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Oct 2014 20:43:38 +0100 Subject: [PATCH 014/138] block: Fix computation of merged request priority Priority of a merged request is computed by ioprio_best(). If one of the requests has undefined priority (IOPRIO_CLASS_NONE) and another request has priority from IOPRIO_CLASS_BE, the function will return the undefined priority which is wrong. Fix the function to properly return priority of a request with the defined priority. Fixes: d58cdfb89ce0c6bd5f81ae931a984ef298dbda20 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/ioprio.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index e50170ca7c33f4..31666c92b46af2 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -157,14 +157,16 @@ static int get_task_ioprio(struct task_struct *p) int ioprio_best(unsigned short aprio, unsigned short bprio) { - unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); - unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + unsigned short aclass; + unsigned short bclass; - if (aclass == IOPRIO_CLASS_NONE) - aclass = IOPRIO_CLASS_BE; - if (bclass == IOPRIO_CLASS_NONE) - bclass = IOPRIO_CLASS_BE; + if (!ioprio_valid(aprio)) + aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + if (!ioprio_valid(bprio)) + bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + aclass = IOPRIO_PRIO_CLASS(aprio); + bclass = IOPRIO_PRIO_CLASS(bprio); if (aclass == bclass) return min(aprio, bprio); if (aclass > bclass) From ec1f1276022e4e3ca40871810217d513e39ff250 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 31 Oct 2014 13:43:06 -0400 Subject: [PATCH 015/138] sunhme: Add DMA mapping error checks. Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunhme.c | 62 ++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 72c8525d5457b4..9c014803b03b23 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1262,6 +1262,7 @@ static void happy_meal_init_rings(struct happy_meal *hp) HMD(("init rxring, ")); for (i = 0; i < RX_RING_SIZE; i++) { struct sk_buff *skb; + u32 mapping; skb = happy_meal_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); if (!skb) { @@ -1272,10 +1273,16 @@ static void happy_meal_init_rings(struct happy_meal *hp) /* Because we reserve afterwards. */ skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET + 4)); + mapping = dma_map_single(hp->dma_dev, skb->data, RX_BUF_ALLOC_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(hp->dma_dev, mapping)) { + dev_kfree_skb_any(skb); + hme_write_rxd(hp, &hb->happy_meal_rxd[i], 0, 0); + continue; + } hme_write_rxd(hp, &hb->happy_meal_rxd[i], (RXFLAG_OWN | ((RX_BUF_ALLOC_SIZE - RX_OFFSET) << 16)), - dma_map_single(hp->dma_dev, skb->data, RX_BUF_ALLOC_SIZE, - DMA_FROM_DEVICE)); + mapping); skb_reserve(skb, RX_OFFSET); } @@ -2020,6 +2027,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) skb = hp->rx_skbs[elem]; if (len > RX_COPY_THRESHOLD) { struct sk_buff *new_skb; + u32 mapping; /* Now refill the entry, if we can. */ new_skb = happy_meal_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); @@ -2027,13 +2035,21 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) drops++; goto drop_it; } + skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET + 4)); + mapping = dma_map_single(hp->dma_dev, new_skb->data, + RX_BUF_ALLOC_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(hp->dma_dev, mapping))) { + dev_kfree_skb_any(new_skb); + drops++; + goto drop_it; + } + dma_unmap_single(hp->dma_dev, dma_addr, RX_BUF_ALLOC_SIZE, DMA_FROM_DEVICE); hp->rx_skbs[elem] = new_skb; - skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET + 4)); hme_write_rxd(hp, this, (RXFLAG_OWN|((RX_BUF_ALLOC_SIZE-RX_OFFSET)<<16)), - dma_map_single(hp->dma_dev, new_skb->data, RX_BUF_ALLOC_SIZE, - DMA_FROM_DEVICE)); + mapping); skb_reserve(new_skb, RX_OFFSET); /* Trim the original skb for the netif. */ @@ -2248,6 +2264,25 @@ static void happy_meal_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void unmap_partial_tx_skb(struct happy_meal *hp, u32 first_mapping, + u32 first_len, u32 first_entry, u32 entry) +{ + struct happy_meal_txd *txbase = &hp->happy_block->happy_meal_txd[0]; + + dma_unmap_single(hp->dma_dev, first_mapping, first_len, DMA_TO_DEVICE); + + first_entry = NEXT_TX(first_entry); + while (first_entry != entry) { + struct happy_meal_txd *this = &txbase[first_entry]; + u32 addr, len; + + addr = hme_read_desc32(hp, &this->tx_addr); + len = hme_read_desc32(hp, &this->tx_flags); + len &= TXFLAG_SIZE; + dma_unmap_page(hp->dma_dev, addr, len, DMA_TO_DEVICE); + } +} + static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -2284,6 +2319,8 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, len = skb->len; mapping = dma_map_single(hp->dma_dev, skb->data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(hp->dma_dev, mapping))) + goto out_dma_error; tx_flags |= (TXFLAG_SOP | TXFLAG_EOP); hme_write_txd(hp, &hp->happy_block->happy_meal_txd[entry], (tx_flags | (len & TXFLAG_SIZE)), @@ -2299,6 +2336,8 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, first_len = skb_headlen(skb); first_mapping = dma_map_single(hp->dma_dev, skb->data, first_len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(hp->dma_dev, first_mapping))) + goto out_dma_error; entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { @@ -2308,6 +2347,11 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, len = skb_frag_size(this_frag); mapping = skb_frag_dma_map(hp->dma_dev, this_frag, 0, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(hp->dma_dev, mapping))) { + unmap_partial_tx_skb(hp, first_mapping, first_len, + first_entry, entry); + goto out_dma_error; + } this_txflags = tx_flags; if (frag == skb_shinfo(skb)->nr_frags - 1) this_txflags |= TXFLAG_EOP; @@ -2333,6 +2377,14 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, tx_add_log(hp, TXLOG_ACTION_TXMIT, 0); return NETDEV_TX_OK; + +out_dma_error: + hp->tx_skbs[hp->tx_new] = NULL; + spin_unlock_irq(&hp->happy_lock); + + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; } static struct net_device_stats *happy_meal_get_stats(struct net_device *dev) From 437374735c0055433f8300ba59f8cf7214c58ad3 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sat, 1 Nov 2014 16:59:34 +0530 Subject: [PATCH 016/138] net: mvpp2: fix possible memory leak we are allocating memory using kzalloc for struct mvpp2_prs_entry, but later when we are getting error we were just returning the error value without releasing the memory. Signed-off-by: Sudip Mukherjee Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index ece83f101526de..fdf3e382e46493 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1692,6 +1692,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, { struct mvpp2_prs_entry *pe; int tid_aux, tid; + int ret = 0; pe = mvpp2_prs_vlan_find(priv, tpid, ai); @@ -1723,8 +1724,10 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, break; } - if (tid <= tid_aux) - return -EINVAL; + if (tid <= tid_aux) { + ret = -EINVAL; + goto error; + } memset(pe, 0 , sizeof(struct mvpp2_prs_entry)); mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN); @@ -1756,9 +1759,10 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, mvpp2_prs_hw_write(priv, pe); +error: kfree(pe); - return 0; + return ret; } /* Get first free double vlan ai number */ @@ -1821,7 +1825,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, unsigned int port_map) { struct mvpp2_prs_entry *pe; - int tid_aux, tid, ai; + int tid_aux, tid, ai, ret = 0; pe = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2); @@ -1838,8 +1842,10 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, /* Set ai value for new double vlan entry */ ai = mvpp2_prs_double_vlan_ai_free_get(priv); - if (ai < 0) - return ai; + if (ai < 0) { + ret = ai; + goto error; + } /* Get first single/triple vlan tid */ for (tid_aux = MVPP2_PE_FIRST_FREE_TID; @@ -1859,8 +1865,10 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, break; } - if (tid >= tid_aux) - return -ERANGE; + if (tid >= tid_aux) { + ret = -ERANGE; + goto error; + } memset(pe, 0, sizeof(struct mvpp2_prs_entry)); mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN); @@ -1887,8 +1895,9 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, mvpp2_prs_tcam_port_map_set(pe, port_map); mvpp2_prs_hw_write(priv, pe); +error: kfree(pe); - return 0; + return ret; } /* IPv4 header parsing for fragmentation and L4 offset */ From 3e8fc38c21d6908b394508a8c9bb220935fed4d2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 31 Oct 2014 15:51:34 -0700 Subject: [PATCH 017/138] net: systemport: fix DMA allocation/freeing sizes We should not be allocating a single byte of DMA coherent memory, but instead a full-sized struct dma_desc (8 bytes). Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 3a6778a667f455..c81bf74685c076 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1110,7 +1110,8 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* We just need one DMA descriptor which is DMA-able, since writing to * the port will allocate a new descriptor in its internal linked-list */ - p = dma_zalloc_coherent(kdev, 1, &ring->desc_dma, GFP_KERNEL); + p = dma_zalloc_coherent(kdev, sizeof(struct dma_desc), &ring->desc_dma, + GFP_KERNEL); if (!p) { netif_err(priv, hw, priv->netdev, "DMA alloc failed\n"); return -ENOMEM; @@ -1183,7 +1184,8 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv, ring->cbs = NULL; if (ring->desc_dma) { - dma_free_coherent(kdev, 1, ring->desc_cpu, ring->desc_dma); + dma_free_coherent(kdev, sizeof(struct dma_desc), + ring->desc_cpu, ring->desc_dma); ring->desc_dma = 0; } ring->size = 0; From 914adb55afbc7e449512f609de19e548d1054480 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 31 Oct 2014 15:51:35 -0700 Subject: [PATCH 018/138] net: systemport: do not crash freeing an unitialized TX ring Callers of bcm_sysport_init_tx_ring() can currently fail, and will always call bcm_sysport_fini_tx_ring() in a loop ending at the number of TX queues (32) without checking if the TX ring was successfully initialized or not. Update bcm_sysport_fini_tx_ring() to return early and avoid a crash de-referencing ring->cbs if the TX ring was not initialized, since ring->cbs is the last part of the initialization done by bcm_sysport_init_tx_ring() that could fail. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Reported-by: Maxime Bizon Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index c81bf74685c076..531bb7c57531b1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1175,6 +1175,13 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv, if (!(reg & TDMA_DISABLED)) netdev_warn(priv->netdev, "TDMA not stopped!\n"); + /* ring->cbs is the last part in bcm_sysport_init_tx_ring which could + * fail, so by checking this pointer we know whether the TX ring was + * fully initialized or not. + */ + if (!ring->cbs) + return; + napi_disable(&ring->napi); netif_napi_del(&ring->napi); From 1db3ddff1602edf2390b7667dcbaa0f71512e3ea Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Sat, 1 Nov 2014 11:08:08 +0800 Subject: [PATCH 019/138] drivers: net: ethernet: xilinx: xilinx_emaclite: Compatible with 'xlnx, xps-ethernetlite-2.00.b' for QEMU using When use current latest upstream qemu (current version: 2.1.2), need let driver compatible with 'xlnx,xps-ethernetlite-2.00.b', or can not find net device in microblaze qemu. Related QEMU commands under fedora 20: yum install libvirt yum install tunctl tunctl -b ip link set tap0 up brctl addif virbr0 tap0 ./microblaze-softmmu/qemu-system-microblaze -M petalogix-s3adsp1800 \ -kernel ../linux-stable.microblaze/arch/microblaze/boot/linux.bin \ -no-reboot -append "console=ttyUL0,115200 doreboot" -nographic \ -net nic,vlan=0,model=xlnx.xps-ethernetlite,macaddr=00:16:35:AF:94:00 \ -net tap,vlan=0,ifname=tap0,script=no,downscript=no in microblaze qemu bash (guest machine): ifconfig eth0 add 192.168.122.2 netmask 255.255.255.0 ifconfig eth0 up After add this patch, can find the device, and can be used by 'telnetd' (need cross-build busybox with glibc for it), then outside can telnet to it without password. Signed-off-by: Chen Gang Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 28dbbdc393ebf2..298fad395d3a42 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1236,6 +1236,7 @@ static struct of_device_id xemaclite_of_match[] = { { .compatible = "xlnx,opb-ethernetlite-1.01.b", }, { .compatible = "xlnx,xps-ethernetlite-1.00.a", }, { .compatible = "xlnx,xps-ethernetlite-2.00.a", }, + { .compatible = "xlnx,xps-ethernetlite-2.00.b", }, { .compatible = "xlnx,xps-ethernetlite-2.01.a", }, { .compatible = "xlnx,xps-ethernetlite-3.00.a", }, { /* end of list */ }, From d52fdbb735c36a209f36a628d40ca9185b349ba7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 31 Oct 2014 21:32:06 +0100 Subject: [PATCH 020/138] smc91x: retrieve IRQ and trigger flags in a modern way The SMC91x is written to explicitly look up the IRQ resource from the platform device and extract the IRQ and flags, however the platform_get_irq() does additional things, like call of_irq_get() in the device tree case, which will translate the IRQ using the irqdomain and defer the probe if the IRQ host cannot be found. As we're not looking up the resource, this will not retrieve the IRQ flags, but that is better done using irqd_get_trigger_type(), as the trigger is what the driver wants to modify. We take care to preserve the semantics that will make the trigger type provided from the resource override any local specifier. Tested on the Nomadik NHK15 which has its SMC91x IRQ line connected to a STMPE2401 GPIO expander on I2C. Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 2c62208077fe7a..6cc3cf6f17c846 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2243,9 +2243,10 @@ static int smc_drv_probe(struct platform_device *pdev) const struct of_device_id *match = NULL; struct smc_local *lp; struct net_device *ndev; - struct resource *res, *ires; + struct resource *res; unsigned int __iomem *addr; unsigned long irq_flags = SMC_IRQ_FLAGS; + unsigned long irq_resflags; int ret; ndev = alloc_etherdev(sizeof(struct smc_local)); @@ -2337,16 +2338,19 @@ static int smc_drv_probe(struct platform_device *pdev) goto out_free_netdev; } - ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!ires) { + ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq <= 0) { ret = -ENODEV; goto out_release_io; } - - ndev->irq = ires->start; - - if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK) - irq_flags = ires->flags & IRQF_TRIGGER_MASK; + /* + * If this platform does not specify any special irqflags, or if + * the resource supplies a trigger, override the irqflags with + * the trigger flags from the resource. + */ + irq_resflags = irqd_get_trigger_type(irq_get_irq_data(ndev->irq)); + if (irq_flags == -1 || irq_resflags & IRQF_TRIGGER_MASK) + irq_flags = irq_resflags & IRQF_TRIGGER_MASK; ret = smc_request_attrib(pdev, ndev); if (ret) From fa51ee1085d6f2fa344d4ba64faadc9c6db0a3f1 Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Fri, 31 Oct 2014 18:11:25 +0100 Subject: [PATCH 021/138] HID: usbhid: enable always-poll quirk for Elan Touchscreen 0103 Yet another device that needs this quirk. Reported-by: Tanguy de Baritault Signed-off-by: Adel Gadllah Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e23ab8b30626da..7c863738e41996 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -299,6 +299,7 @@ #define USB_VENDOR_ID_ELAN 0x04f3 #define USB_DEVICE_ID_ELAN_TOUCHSCREEN 0x0089 #define USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B 0x009b +#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103 0x0103 #define USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F 0x016f #define USB_VENDOR_ID_ELECOM 0x056e diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 5014bb567b29cd..552671ee7c5d7c 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -72,6 +72,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, From b8fff407a180286aa683d543d878d98d9fc57b13 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Nov 2014 13:57:46 +0100 Subject: [PATCH 022/138] mac80211: fix use-after-free in defragmentation Upon receiving the last fragment, all but the first fragment are freed, but the multicast check for statistics at the end of the function refers to the current skb (the last fragment) causing a use-after-free bug. Since multicast frames cannot be fragmented and we check for this early in the function, just modify that check to also do the accounting to fix the issue. Cc: stable@vger.kernel.org Reported-by: Yosef Khyal Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b04ca4049c95f2..a37f9af634cb6d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1678,11 +1678,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + + if (is_multicast_ether_addr(hdr->addr1)) { + rx->local->dot11MulticastReceivedFrameCount++; goto out; } + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1775,10 +1778,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: if (rx->sta) rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); + ieee80211_led_rx(rx->local); return RX_CONTINUE; } From 31b8b343e019e0a0c57ca9c13520a87f9cab884b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 2 Nov 2014 15:48:09 +0200 Subject: [PATCH 023/138] iwlwifi: fix RFkill while calibrating If the RFkill interrupt fires while we calibrate, it would make the firmware fail and the driver wasn't able to recover. Change the flow so that the driver will kill the firmware in that case. Since we have now two flows that are calling trans_stop_device (the RFkill interrupt and the op_mode_mvm_start function) - we need to better sync this. Use the STATUS_DEVICE_ENABLED in the pcie transport in an atomic way to achieve this. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=86231 CC: [3.10+] Reviewed-by: Johannes Berg Reviewed-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/fw.c | 10 +++++++++- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 1 + drivers/net/wireless/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/iwlwifi/mvm/ops.c | 11 ++++++++++- drivers/net/wireless/iwlwifi/pcie/trans.c | 4 ++-- 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index e0d9f19650b07e..eb03943f846326 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -284,7 +284,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) lockdep_assert_held(&mvm->mutex); - if (WARN_ON_ONCE(mvm->init_ucode_complete)) + if (WARN_ON_ONCE(mvm->init_ucode_complete || mvm->calibrating)) return 0; iwl_init_notification_wait(&mvm->notif_wait, @@ -334,6 +334,8 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) goto out; } + mvm->calibrating = true; + /* Send TX valid antennas before triggering calibrations */ ret = iwl_send_tx_ant_cfg(mvm, mvm->fw->valid_tx_ant); if (ret) @@ -358,11 +360,17 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) MVM_UCODE_CALIB_TIMEOUT); if (!ret) mvm->init_ucode_complete = true; + + if (ret && iwl_mvm_is_radio_killed(mvm)) { + IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n"); + ret = 1; + } goto out; error: iwl_remove_notification(&mvm->notif_wait, &calib_wait); out: + mvm->calibrating = false; if (iwlmvm_mod_params.init_dbg && !mvm->nvm_data) { /* we want to debug INIT and we have no NVM - fake */ mvm->nvm_data = kzalloc(sizeof(struct iwl_nvm_data) + diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 585fe5b7100fb8..b62405865b25cd 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -788,6 +788,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm) mvm->scan_status = IWL_MVM_SCAN_NONE; mvm->ps_disabled = false; + mvm->calibrating = false; /* just in case one was running */ ieee80211_remain_on_channel_expired(mvm->hw); diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index b153ced7015bfe..845429c88cf403 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -548,6 +548,7 @@ struct iwl_mvm { enum iwl_ucode_type cur_ucode; bool ucode_loaded; bool init_ucode_complete; + bool calibrating; u32 error_event_table; u32 log_event_table; u32 umac_error_event_table; diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 27fb0a1ef274b6..5b719ee8e78907 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -753,6 +753,7 @@ void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state) static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + bool calibrating = ACCESS_ONCE(mvm->calibrating); if (state) set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); @@ -761,7 +762,15 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) wiphy_rfkill_set_hw_state(mvm->hw->wiphy, iwl_mvm_is_radio_killed(mvm)); - return state && mvm->cur_ucode != IWL_UCODE_INIT; + /* iwl_run_init_mvm_ucode is waiting for results, abort it */ + if (calibrating) + iwl_abort_notification_waits(&mvm->notif_wait); + + /* + * Stop the device if we run OPERATIONAL firmware or if we are in the + * middle of the calibrations. + */ + return state && (mvm->cur_ucode != IWL_UCODE_INIT || calibrating); } static void iwl_mvm_free_skb(struct iwl_op_mode *op_mode, struct sk_buff *skb) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 3781b029e54a32..160c3ebc48d0b2 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -915,7 +915,8 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans) * restart. So don't process again if the device is * already dead. */ - if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) { + if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) { + IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n"); iwl_pcie_tx_stop(trans); iwl_pcie_rx_stop(trans); @@ -945,7 +946,6 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans) /* clear all status bits */ clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); clear_bit(STATUS_INT_ENABLED, &trans->status); - clear_bit(STATUS_DEVICE_ENABLED, &trans->status); clear_bit(STATUS_TPOWER_PMI, &trans->status); clear_bit(STATUS_RFKILL, &trans->status); From c1207c049b204b0a96535dc5416aee331b51e0e1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 2 Nov 2014 18:19:15 -0800 Subject: [PATCH 024/138] netfilter: nft_reject_bridge: Fix powerpc build error Fix: net/bridge/netfilter/nft_reject_bridge.c: In function 'nft_reject_br_send_v6_unreach': net/bridge/netfilter/nft_reject_bridge.c:240:3: error: implicit declaration of function 'csum_ipv6_magic' csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, ^ make[3]: *** [net/bridge/netfilter/nft_reject_bridge.o] Error 1 Seen with powerpc:allmodconfig. Fixes: 523b929d5446 ("netfilter: nft_reject_bridge: don't use IP stack to reject traffic") Cc: Pablo Neira Ayuso Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- net/bridge/netfilter/nft_reject_bridge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 654c9018e3e75a..48da2c54a69e36 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "../br_private.h" From 7071cf7fc435ab84df872613f613a9a055964fc1 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 2 Nov 2014 11:31:41 -0800 Subject: [PATCH 025/138] uapi: add missing network related headers to kbuild The makefile for sanitizing kernel headers uses the kbuild file to determine which files to do. Several networking related headers were missing. Without these headers iproute2 build would break. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index b70237e8bc37b4..4c94f31a8c99b6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -125,6 +125,7 @@ header-y += filter.h header-y += firewire-cdev.h header-y += firewire-constants.h header-y += flat.h +header-y += fou.h header-y += fs.h header-y += fsl_hypervisor.h header-y += fuse.h @@ -141,6 +142,7 @@ header-y += hid.h header-y += hiddev.h header-y += hidraw.h header-y += hpet.h +header-y += hsr_netlink.h header-y += hyperv.h header-y += hysdn_if.h header-y += i2c-dev.h @@ -251,6 +253,7 @@ header-y += mii.h header-y += minix_fs.h header-y += mman.h header-y += mmtimer.h +header-y += mpls.h header-y += mqueue.h header-y += mroute.h header-y += mroute6.h @@ -424,6 +427,7 @@ header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h header-y += virtio_rng.h +header=y += vm_sockets.h header-y += vt.h header-y += wait.h header-y += wanrouter.h From a8f9bfdf982e2b1fb9f094e4de9ab08c57f3d2fd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Nov 2014 04:30:13 +0800 Subject: [PATCH 026/138] tun: Fix csum_start with VLAN acceleration When VLAN acceleration is in use on the xmit path, we end up setting csum_start to the wrong place. The result is that the whoever ends up doing the checksum setting will corrupt the packet instead of writing the checksum to the expected location, usually this means writing the checksum with an offset of -4. This patch fixes this by adjusting csum_start when VLAN acceleration is detected. Fixes: 6680ec68eff4 ("tuntap: hardware vlan tx support") Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/tun.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7302398f0b1fff..57e6bf75a6325a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1235,6 +1235,10 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; int vlan_offset = 0, copied; + int vlan_hlen = 0; + + if (vlan_tx_tag_present(skb)) + vlan_hlen = VLAN_HLEN; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) @@ -1284,7 +1288,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (skb->ip_summed == CHECKSUM_PARTIAL) { gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - gso.csum_start = skb_checksum_start_offset(skb); + gso.csum_start = skb_checksum_start_offset(skb) + + vlan_hlen; gso.csum_offset = skb->csum_offset; } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { gso.flags = VIRTIO_NET_HDR_F_DATA_VALID; @@ -1297,10 +1302,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, } copied = total; - total += skb->len; - if (!vlan_tx_tag_present(skb)) { - len = min_t(int, skb->len, len); - } else { + len = min_t(int, skb->len + vlan_hlen, len); + total += skb->len + vlan_hlen; + if (vlan_hlen) { int copy, ret; struct { __be16 h_vlan_proto; @@ -1311,8 +1315,6 @@ static ssize_t tun_put_user(struct tun_struct *tun, veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); - len = min_t(int, skb->len + VLAN_HLEN, len); - total += VLAN_HLEN; copy = min_t(int, vlan_offset, len); ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); From 2eb783c43e7cf807a45899c10ed556b6dc116625 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Nov 2014 04:30:14 +0800 Subject: [PATCH 027/138] tun: Fix TUN_PKT_STRIP setting We set the flag TUN_PKT_STRIP if the user buffer provided is too small to contain the entire packet plus meta-data. However, this has been broken ever since we added GSO meta-data. VLAN acceleration also has the same problem. This patch fixes this by taking both into account when setting the TUN_PKT_STRIP flag. The fact that this has been broken for six years without anyone realising means that nobody actually uses this flag. Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/tun.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 57e6bf75a6325a..9dd3746994a42c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1236,15 +1236,19 @@ static ssize_t tun_put_user(struct tun_struct *tun, ssize_t total = 0; int vlan_offset = 0, copied; int vlan_hlen = 0; + int vnet_hdr_sz = 0; if (vlan_tx_tag_present(skb)) vlan_hlen = VLAN_HLEN; + if (tun->flags & TUN_VNET_HDR) + vnet_hdr_sz = tun->vnet_hdr_sz; + if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) return -EINVAL; - if (len < skb->len) { + if (len < skb->len + vlan_hlen + vnet_hdr_sz) { /* Packet will be striped */ pi.flags |= TUN_PKT_STRIP; } @@ -1254,9 +1258,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, total += sizeof(pi); } - if (tun->flags & TUN_VNET_HDR) { + if (vnet_hdr_sz) { struct virtio_net_hdr gso = { 0 }; /* no info leak */ - if ((len -= tun->vnet_hdr_sz) < 0) + if ((len -= vnet_hdr_sz) < 0) return -EINVAL; if (skb_is_gso(skb)) { @@ -1298,7 +1302,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total, sizeof(gso)))) return -EFAULT; - total += tun->vnet_hdr_sz; + total += vnet_hdr_sz; } copied = total; From f4c4a4e068a3f43b84bc1bcb9beb295dd5ff2529 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Mon, 3 Nov 2014 13:26:50 +0800 Subject: [PATCH 028/138] net: fec: fix suspend broken on multiple MACs sillicons On i.MX6SX sdb platform, there has two same enet MACs, after system up, just eth0 is up, and then do suspend/resume test: [ 50.437967] PM: Syncing filesystems ... done. [ 50.476924] Freezing user space processes ... (elapsed 0.005 seconds) done. [ 50.490093] Freezing remaining freezable tasks ... (elapsed 0.004 seconds) done. [ 50.559771] ------------[ cut here ]------------ [ 50.564453] WARNING: CPU: 0 PID: 575 at drivers/clk/clk.c:851 __clk_disable+0x60/0x6c() [ 50.572475] Modules linked in: [ 50.575578] CPU: 0 PID: 575 Comm: sh Not tainted 3.18.0-rc2-next-20141031-00007-gf61135b #21 [ 50.584031] Backtrace: [ 50.586550] [<80011ecc>] (dump_backtrace) from [<8001206c>] (show_stack+0x18/0x1c) [ 50.594136] r6:808a7a54 r5:00000000 r4:00000000 r3:00000000 [ 50.599920] [<80012054>] (show_stack) from [<806ab3c0>] (dump_stack+0x80/0x9c) [ 50.607187] [<806ab340>] (dump_stack) from [<8002a3e8>] (warn_slowpath_common+0x6c/0x8c) [ 50.615294] r5:00000353 r4:00000000 [ 50.618940] [<8002a37c>] (warn_slowpath_common) from [<8002a42c>] (warn_slowpath_null+0x24/0x2c) [ 50.627738] r8:00000000 r7:be144c44 r6:be015600 r5:80070013 r4:be015600 [ 50.634573] [<8002a408>] (warn_slowpath_null) from [<804f8d4c>] (__clk_disable+0x60/0x6c) [ 50.642777] [<804f8cec>] (__clk_disable) from [<804f8e5c>] (clk_disable+0x2c/0x38) [ 50.650359] r4:be015600 r3:00000000 [ 50.654006] [<804f8e30>] (clk_disable) from [<80420ab4>] (fec_enet_clk_enable+0xc4/0x258) [ 50.662196] r5:be3cb620 r4:be3cb000 [ 50.665838] [<804209f0>] (fec_enet_clk_enable) from [<80421178>] (fec_suspend+0x30/0x180) [ 50.674026] r7:be144c44 r6:be144c10 r5:8037f5a4 r4:be3cb000 [ 50.679802] [<80421148>] (fec_suspend) from [<8037f5d8>] (platform_pm_suspend+0x34/0x64) [ 50.687906] r10:00000000 r9:00000000 r8:00000000 r7:be144c44 r6:be144c10 r5:8037f5a4 [ 50.695852] r4:be144c10 r3:80421148 [ 50.699511] [<8037f5a4>] (platform_pm_suspend) from [<8038784c>] (dpm_run_callback.isra.14+0x34/0x6c) [ 50.708764] [<80387818>] (dpm_run_callback.isra.14) from [<80387f00>] (__device_suspend+0x12c/0x2a4) [ 50.717909] r9:8098ec8c r8:80973bec r6:00000002 r5:811c7038 r4:be144c10 [ 50.724746] [<80387dd4>] (__device_suspend) from [<803894fc>] (dpm_suspend+0x64/0x224) [ 50.732675] r8:80973bec r7:be144c10 r6:8098ec24 r5:811c7038 r4:be144cc4 [ 50.739509] [<80389498>] (dpm_suspend) from [<8038999c>] (dpm_suspend_start+0x60/0x68) [ 50.747438] r10:8082fa24 r9:00000000 r8:00000004 r7:00000003 r6:00000000 r5:8116ec80 [ 50.755386] r4:00000002 [ 50.757969] [<8038993c>] (dpm_suspend_start) from [<800679d8>] (suspend_devices_and_enter+0x90/0x3ec) [ 50.767202] r4:00000003 r3:8116eca0 [ 50.770843] [<80067948>] (suspend_devices_and_enter) from [<80067f40>] (pm_suspend+0x20c/0x2a4) [ 50.779553] r8:00000004 r7:00000003 r6:00000000 r5:8116ec8c r4:00000003 [ 50.786394] [<80067d34>] (pm_suspend) from [<80066858>] (state_store+0x70/0xc0) [ 50.793718] r6:8116ec90 r5:00000003 r4:bd88a800 r3:0000006d [ 50.799496] [<800667e8>] (state_store) from [<802b0384>] (kobj_attr_store+0x1c/0x28) [ 50.807251] r10:bd399f78 r8:00000000 r7:bd88a800 r6:bd88a800 r5:00000004 r4:bd085680 [ 50.815219] [<802b0368>] (kobj_attr_store) from [<80153090>] (sysfs_kf_write+0x54/0x58) [ 50.823252] [<8015303c>] (sysfs_kf_write) from [<80151fd8>] (kernfs_fop_write+0xd0/0x194) [ 50.831441] r6:00000004 r5:bd08568c r4:bd085680 r3:8015303c [ 50.837220] [<80151f08>] (kernfs_fop_write) from [<800eddb4>] (vfs_write+0xb8/0x1a8) [ 50.844975] r10:00000000 r9:00000000 r8:00000000 r7:bd399f78 r6:01336408 r5:00000004 [ 50.852924] r4:bc584dc0 [ 50.855505] [<800edcfc>] (vfs_write) from [<800ee0b8>] (SyS_write+0x48/0x88) [ 50.862567] r10:00000000 r8:00000000 r7:01336408 r6:00000004 r5:bc584dc0 r4:bc584dc0 [ 50.870537] [<800ee070>] (SyS_write) from [<8000eb00>] (ret_fast_syscall+0x0/0x48) [ 50.878120] r9:bd398000 r8:8000ecc4 r7:00000004 r6:76f42b48 r5:01336408 r4:00000004 [ 50.885983] ---[ end trace 7545115d752a316a ]--- [ 50.890765] ------------[ cut here ]------------ The root cause is that eth1 is not opened and clock is not enabled, and .suspend() still call .fec_enet_clk_enable() to disable clock. To avoid the broken, let it check network device up status by calling .netif_running() before disable/enable clocks. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 50a851db2852e2..c27128de8dde5c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3343,12 +3343,11 @@ static int __maybe_unused fec_suspend(struct device *dev) netif_device_detach(ndev); netif_tx_unlock_bh(ndev); fec_stop(ndev); + fec_enet_clk_enable(ndev, false); + pinctrl_pm_select_sleep_state(&fep->pdev->dev); } rtnl_unlock(); - fec_enet_clk_enable(ndev, false); - pinctrl_pm_select_sleep_state(&fep->pdev->dev); - if (fep->reg_phy) regulator_disable(fep->reg_phy); @@ -3367,13 +3366,14 @@ static int __maybe_unused fec_resume(struct device *dev) return ret; } - pinctrl_pm_select_default_state(&fep->pdev->dev); - ret = fec_enet_clk_enable(ndev, true); - if (ret) - goto failed_clk; - rtnl_lock(); if (netif_running(ndev)) { + pinctrl_pm_select_default_state(&fep->pdev->dev); + ret = fec_enet_clk_enable(ndev, true); + if (ret) { + rtnl_unlock(); + goto failed_clk; + } fec_restart(ndev); netif_tx_lock_bh(ndev); netif_device_attach(ndev); From 3ce9b20f1971690b8b3b620e735ec99431573b39 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Nov 2014 14:01:25 +0800 Subject: [PATCH 029/138] macvtap: Fix csum_start when VLAN tags are present When VLAN is in use in macvtap_put_user, we end up setting csum_start to the wrong place. The result is that the whoever ends up doing the checksum setting will corrupt the packet instead of writing the checksum to the expected location, usually this means writing the checksum with an offset of -4. This patch fixes this by adjusting csum_start when VLAN tags are detected. Fixes: f09e2249c4f5 ("macvtap: restore vlan header on user read") Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Cheers, Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6f226de655a407..880cc090dc44ec 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -629,6 +629,8 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL) { vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; vnet_hdr->csum_start = skb_checksum_start_offset(skb); + if (vlan_tx_tag_present(skb)) + vnet_hdr->csum_start += VLAN_HLEN; vnet_hdr->csum_offset = skb->csum_offset; } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; From 6c6151daaf2d8dc2046d9926539feed5f66bf74e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:27 +0100 Subject: [PATCH 030/138] ip6_tunnel: Use ip6_tnl_dev_init as the ndo_init function. ip6_tnl_dev_init() sets the dev->iflink via a call to ip6_tnl_link_config(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for ipv6 tunnels. Fix this by using ip6_tnl_dev_init() as the ndo_init function. Then ip6_tnl_dev_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9409887fb664dd..9cb94cfa0ae71d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -272,9 +272,6 @@ static int ip6_tnl_create2(struct net_device *dev) int err; t = netdev_priv(dev); - err = ip6_tnl_dev_init(dev); - if (err < 0) - goto out; err = register_netdevice(dev); if (err < 0) @@ -1462,6 +1459,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, @@ -1546,16 +1544,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - int err = ip6_tnl_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - ip6_tnl_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } From 16a0231bf7dc3fb37e9b1f1cb1a277dc220b5c5e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:28 +0100 Subject: [PATCH 031/138] vti6: Use vti6_dev_init as the ndo_init function. vti6_dev_init() sets the dev->iflink via a call to vti6_link_config(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for vti6 tunnels. Fix this by using vti6_dev_init() as the ndo_init function. Then vti6_dev_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d440bb585524d7..31089d153fd332 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -172,10 +172,6 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; - err = vti6_dev_init(dev); - if (err < 0) - goto out; - err = register_netdevice(dev); if (err < 0) goto out; @@ -783,6 +779,7 @@ static int vti6_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops vti6_netdev_ops = { + .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, @@ -852,16 +849,10 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); - int err = vti6_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - vti6_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } From ebe084aafb7e93adf210e80043c9f69adf56820d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:29 +0100 Subject: [PATCH 032/138] sit: Use ipip6_tunnel_init as the ndo_init function. ipip6_tunnel_init() sets the dev->iflink via a call to ipip6_tunnel_bind_dev(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for ipv6 tunnels. Fix this by using ipip6_tunnel_init() as the ndo_init function. Then ipip6_tunnel_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/sit.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 58e5b4710127a9..a24557a1c1d8b6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -195,10 +195,8 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - err = ipip6_tunnel_init(dev); - if (err < 0) - goto out; - ipip6_tunnel_clone_6rd(dev, sitn); + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; @@ -207,7 +205,8 @@ static int ipip6_tunnel_create(struct net_device *dev) if (err < 0) goto out; - strcpy(t->parms.name, dev->name); + ipip6_tunnel_clone_6rd(dev, sitn); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1330,6 +1329,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops ipip6_netdev_ops = { + .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, @@ -1378,9 +1378,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -1405,7 +1403,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->protocol = IPPROTO_IPV6; From f03eb128e3f4276f46442d14f3b8f864f3775821 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:30 +0100 Subject: [PATCH 033/138] gre6: Move the setting of dev->iflink into the ndo_init functions. Otherwise it gets overwritten by register_netdev(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 12c3c8ef384946..4564e1fca3eb42 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -961,8 +961,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - /* Precalculate GRE options length */ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { if (t->parms.o_flags&GRE_CSUM) @@ -1272,6 +1270,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) u64_stats_init(&ip6gre_tunnel_stats->syncp); } + dev->iflink = tunnel->parms.link; return 0; } @@ -1481,6 +1480,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + dev->iflink = tunnel->parms.link; + return 0; } From 9fd3d3a4307283a1d85d9a383223055954b7135f Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 3 Nov 2014 14:14:35 +0000 Subject: [PATCH 034/138] sfc: don't BUG_ON efx->max_channels == 0 in probe efx_ef10_probe() was BUGging out if the BAR2 size was 0. This is unnecessarily violent; instead we should just fail to probe the device. Kept a WARN_ON as this problem indicates a broken or misconfigured NIC. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 002d4cdc319fda..a77f05ce832596 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -180,7 +180,8 @@ static int efx_ef10_probe(struct efx_nic *efx) EFX_MAX_CHANNELS, resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) / (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES)); - BUG_ON(efx->max_channels == 0); + if (WARN_ON(efx->max_channels == 0)) + return -EIO; nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL); if (!nic_data) From 9c5c6ed7b5078ba42b1c769a1c29b3ae4a6bee36 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 3 Nov 2014 15:33:32 -0500 Subject: [PATCH 035/138] HID: core: cleanup .claimed field on disconnect When a subdriver is rmmod-ed then re-insmod-ed, the hid device is not destroyed as it is owned by the transport layer. So when we re-probed the device, the hid device is assumed to be already claimed, and can lead to page faults if hid-core tries to forward the emitted data to the to-be-created claimed node. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 73bd9e2e42bc3c..3402033fa52a72 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1659,6 +1659,7 @@ void hid_disconnect(struct hid_device *hdev) hdev->hiddev_disconnect(hdev); if (hdev->claimed & HID_CLAIMED_HIDRAW) hidraw_disconnect(hdev); + hdev->claimed = 0; } EXPORT_SYMBOL_GPL(hid_disconnect); From 9a23c1d6f0f5dbac4c9b73fa6cea7c9ee3d29074 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 3 Nov 2014 09:56:11 +0100 Subject: [PATCH 036/138] ahci: fix AHCI parameters not taken into account Changes into the AHCI subsystem have introduced a bug by not taking into account the force_port_map and mask_port_map parameters when using the ahci_pci_save_initial_config function. This commit fixes it by setting the internal parameters of the ahci_port_priv structure. Fixes: 725c7b570fda Reported-and-tested-by: Zlatko Calusic Signed-off-by: Antoine Tenart --- drivers/ata/ahci.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 89c08113411285..e45f83789809a2 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -527,12 +527,9 @@ MODULE_PARM_DESC(marvell_enable, "Marvell SATA via AHCI (1 = enabled)"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - unsigned int force_port_map = 0; - unsigned int mask_port_map = 0; - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); - force_port_map = 1; + hpriv->force_port_map = 1; } /* @@ -542,9 +539,9 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, */ if (hpriv->flags & AHCI_HFLAG_MV_PATA) { if (pdev->device == 0x6121) - mask_port_map = 0x3; + hpriv->mask_port_map = 0x3; else - mask_port_map = 0xf; + hpriv->mask_port_map = 0xf; dev_info(&pdev->dev, "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n"); } From f3af020b9a8d298022b811a19719df0cf461efa5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Nov 2014 13:52:27 -0500 Subject: [PATCH 037/138] blk-mq: make mq_queue_reinit_notify() freeze queues in parallel q->mq_usage_counter is a percpu_ref which is killed and drained when the queue is frozen. On a CPU hotplug event, blk_mq_queue_reinit() which involves freezing the queue is invoked on all existing queues. Because percpu_ref killing and draining involve a RCU grace period, doing the above on one queue after another may take a long time if there are many queues on the system. This patch splits out initiation of freezing and waiting for its completion, and updates blk_mq_queue_reinit_notify() so that the queues are frozen in parallel instead of one after another. Note that freezing and unfreezing are moved from blk_mq_queue_reinit() to blk_mq_queue_reinit_notify(). Signed-off-by: Tejun Heo Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Signed-off-by: Jens Axboe --- block/blk-mq.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 68929bad9a6a40..1d016fc9a8b640 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -107,11 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) wake_up_all(&q->mq_freeze_wq); } -/* - * Guarantee no request is in use, so we can change any data structure of - * the queue afterward. - */ -void blk_mq_freeze_queue(struct request_queue *q) +static void blk_mq_freeze_queue_start(struct request_queue *q) { bool freeze; @@ -123,9 +119,23 @@ void blk_mq_freeze_queue(struct request_queue *q) percpu_ref_kill(&q->mq_usage_counter); blk_mq_run_queues(q, false); } +} + +static void blk_mq_freeze_queue_wait(struct request_queue *q) +{ wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); } +/* + * Guarantee no request is in use, so we can change any data structure of + * the queue afterward. + */ +void blk_mq_freeze_queue(struct request_queue *q) +{ + blk_mq_freeze_queue_start(q); + blk_mq_freeze_queue_wait(q); +} + static void blk_mq_unfreeze_queue(struct request_queue *q) { bool wake; @@ -1921,7 +1931,7 @@ void blk_mq_free_queue(struct request_queue *q) /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) { - blk_mq_freeze_queue(q); + WARN_ON_ONCE(!q->mq_freeze_depth); blk_mq_sysfs_unregister(q); @@ -1936,8 +1946,6 @@ static void blk_mq_queue_reinit(struct request_queue *q) blk_mq_map_swqueue(q); blk_mq_sysfs_register(q); - - blk_mq_unfreeze_queue(q); } static int blk_mq_queue_reinit_notify(struct notifier_block *nb, @@ -1956,8 +1964,25 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, return NOTIFY_OK; mutex_lock(&all_q_mutex); + + /* + * We need to freeze and reinit all existing queues. Freezing + * involves synchronous wait for an RCU grace period and doing it + * one by one may take a long time. Start freezing all queues in + * one swoop and then wait for the completions so that freezing can + * take place in parallel. + */ + list_for_each_entry(q, &all_q_list, all_q_node) + blk_mq_freeze_queue_start(q); + list_for_each_entry(q, &all_q_list, all_q_node) + blk_mq_freeze_queue_wait(q); + list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_queue_reinit(q); + + list_for_each_entry(q, &all_q_list, all_q_node) + blk_mq_unfreeze_queue(q); + mutex_unlock(&all_q_mutex); return NOTIFY_OK; } From 09c9e0593d7215c809a4a47659b0e760112d656e Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 3 Nov 2014 11:59:54 -0800 Subject: [PATCH 038/138] dtb: xgene: fix: Backward compatibility with older firmware The following kernel crash was reported when using older firmware (<= 1.13.28). [ 0.980000] libphy: APM X-Gene MDIO bus: probed [ 1.130000] Unhandled fault: synchronous external abort (0x96000010) at 0xffffff800009a17c [ 1.140000] Internal error: : 96000010 [#1] SMP [ 1.140000] Modules linked in: [ 1.140000] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.17.0+ #21 [ 1.140000] task: ffffffc3f0110000 ti: ffffffc3f0064000 task.ti: ffffffc3f0064000 [ 1.140000] PC is at ioread32+0x58/0x68 [ 1.140000] LR is at xgene_enet_setup_ring+0x18c/0x1cc [ 1.140000] pc : [] lr : [] pstate: a0000045 [ 1.140000] sp : ffffffc3f0067b20 [ 1.140000] x29: ffffffc3f0067b20 x28: ffffffc000aa8ea0 [ 1.140000] x27: ffffffc000bb2000 x26: ffffffc000a64270 [ 1.140000] x25: ffffffc000b05ad8 x24: ffffffc0ff99ba58 [ 1.140000] x23: 0000000000004000 x22: 0000000000004000 [ 1.140000] x21: 0000000000000200 x20: 0000000000200000 [ 1.140000] x19: ffffffc0ff99ba18 x18: ffffffc0007a6000 [ 1.140000] x17: 0000000000000007 x16: 000000000000000e [ 1.140000] x15: 0000000000000001 x14: 0000000000000000 [ 1.140000] x13: ffffffbeedb71320 x12: 00000000ffffff80 [ 1.140000] x11: 0000000000000002 x10: 0000000000000000 [ 1.140000] x9 : 0000000000000000 x8 : ffffffc3eb2a4000 [ 1.140000] x7 : 0000000000000000 x6 : 0000000000000000 [ 1.140000] x5 : 0000000001080000 x4 : 000000007d654010 [ 1.140000] x3 : ffffffffffffffff x2 : 000000000003ffff [ 1.140000] x1 : ffffff800009a17c x0 : ffffff800009a17c The issue was that the older firmware does not support 10GbE and SGMII based 1GBE interfaces. This patch changes the address length of the reg property of sgmii0 and xgmii nodes and serves as preparatory patch for the fix. Signed-off-by: Iyappan Subramanian Signed-off-by: Keyur Chudgar Reported-by: Dann Frazier Signed-off-by: David S. Miller --- arch/arm64/boot/dts/apm-storm.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 295c72d52a1f72..f1ad9c2ab2e917 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -599,7 +599,7 @@ compatible = "apm,xgene-enet"; status = "disabled"; reg = <0x0 0x17020000 0x0 0xd100>, - <0x0 0X17030000 0x0 0X400>, + <0x0 0X17030000 0x0 0Xc300>, <0x0 0X10000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; interrupts = <0x0 0x3c 0x4>; @@ -624,9 +624,9 @@ sgenet0: ethernet@1f210000 { compatible = "apm,xgene-enet"; status = "disabled"; - reg = <0x0 0x1f210000 0x0 0x10000>, - <0x0 0x1f200000 0x0 0X10000>, - <0x0 0x1B000000 0x0 0X20000>; + reg = <0x0 0x1f210000 0x0 0xd100>, + <0x0 0x1f200000 0x0 0Xc300>, + <0x0 0x1B000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; interrupts = <0x0 0xA0 0x4>; dma-coherent; @@ -639,7 +639,7 @@ compatible = "apm,xgene-enet"; status = "disabled"; reg = <0x0 0x1f610000 0x0 0xd100>, - <0x0 0x1f600000 0x0 0X400>, + <0x0 0x1f600000 0x0 0Xc300>, <0x0 0x18000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; interrupts = <0x0 0x60 0x4>; From c3f4465d272fa94d5a077c502e83d3e712ec8d62 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 3 Nov 2014 11:59:55 -0800 Subject: [PATCH 039/138] drivers: net: xgene: Backward compatibility with older firmware This patch adds support when used with older firmware (<= 1.13.28). - Added xgene_ring_mgr_init() to check whether ring manager is initialized - Calling xgene_ring_mgr_init() from xgene_port_ops.reset() - To handle errors, changed the return type of xgene_port_ops.reset() Signed-off-by: Iyappan Subramanian Signed-off-by: Keyur Chudgar Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 18 +++++++++++++++++- drivers/net/ethernet/apm/xgene/xgene_enet_hw.h | 4 ++++ .../net/ethernet/apm/xgene/xgene_enet_main.c | 5 ++++- .../net/ethernet/apm/xgene/xgene_enet_main.h | 2 +- .../net/ethernet/apm/xgene/xgene_enet_sgmac.c | 7 ++++++- .../net/ethernet/apm/xgene/xgene_enet_xgmac.c | 7 ++++++- 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 63ea1941e973b6..7ba83ffb08ac73 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -575,10 +575,24 @@ static void xgene_gmac_tx_disable(struct xgene_enet_pdata *pdata) xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data & ~TX_EN); } -static void xgene_enet_reset(struct xgene_enet_pdata *pdata) +bool xgene_ring_mgr_init(struct xgene_enet_pdata *p) +{ + if (!ioread32(p->ring_csr_addr + CLKEN_ADDR)) + return false; + + if (ioread32(p->ring_csr_addr + SRST_ADDR)) + return false; + + return true; +} + +static int xgene_enet_reset(struct xgene_enet_pdata *pdata) { u32 val; + if (!xgene_ring_mgr_init(pdata)) + return -ENODEV; + clk_prepare_enable(pdata->clk); clk_disable_unprepare(pdata->clk); clk_prepare_enable(pdata->clk); @@ -590,6 +604,8 @@ static void xgene_enet_reset(struct xgene_enet_pdata *pdata) val |= SCAN_AUTO_INCR; MGMT_CLOCK_SEL_SET(&val, 1); xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, val); + + return 0; } static void xgene_gport_shutdown(struct xgene_enet_pdata *pdata) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 38558584080ed8..ec45f3256f0e3d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -104,6 +104,9 @@ enum xgene_enet_rm { #define BLOCK_ETH_MAC_OFFSET 0x0000 #define BLOCK_ETH_MAC_CSR_OFFSET 0x2800 +#define CLKEN_ADDR 0xc208 +#define SRST_ADDR 0xc200 + #define MAC_ADDR_REG_OFFSET 0x00 #define MAC_COMMAND_REG_OFFSET 0x04 #define MAC_WRITE_REG_OFFSET 0x08 @@ -318,6 +321,7 @@ void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata); void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata); +bool xgene_ring_mgr_init(struct xgene_enet_pdata *p); extern struct xgene_mac_ops xgene_gmac_ops; extern struct xgene_port_ops xgene_gport_ops; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 3c208cc6f6bb47..cc3f9559a19687 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -852,7 +852,9 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) u16 dst_ring_num; int ret; - pdata->port_ops->reset(pdata); + ret = pdata->port_ops->reset(pdata); + if (ret) + return ret; ret = xgene_enet_create_desc_rings(ndev); if (ret) { @@ -954,6 +956,7 @@ static int xgene_enet_probe(struct platform_device *pdev) return ret; err: + unregister_netdev(ndev); free_netdev(ndev); return ret; } diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 874e5a01161fb9..dba647d357400a 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -83,7 +83,7 @@ struct xgene_mac_ops { }; struct xgene_port_ops { - void (*reset)(struct xgene_enet_pdata *pdata); + int (*reset)(struct xgene_enet_pdata *pdata); void (*cle_bypass)(struct xgene_enet_pdata *pdata, u32 dst_ring_num, u16 bufpool_id); void (*shutdown)(struct xgene_enet_pdata *pdata); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index c22f32622fa9a5..f5d4f68c288c39 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -311,14 +311,19 @@ static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p) xgene_sgmac_rxtx(p, TX_EN, false); } -static void xgene_enet_reset(struct xgene_enet_pdata *p) +static int xgene_enet_reset(struct xgene_enet_pdata *p) { + if (!xgene_ring_mgr_init(p)) + return -ENODEV; + clk_prepare_enable(p->clk); clk_disable_unprepare(p->clk); clk_prepare_enable(p->clk); xgene_enet_ecc_init(p); xgene_enet_config_ring_if_assoc(p); + + return 0; } static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 67d07206b3c772..a18a9d1f11432d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -252,14 +252,19 @@ static void xgene_xgmac_tx_disable(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data & ~HSTTFEN); } -static void xgene_enet_reset(struct xgene_enet_pdata *pdata) +static int xgene_enet_reset(struct xgene_enet_pdata *pdata) { + if (!xgene_ring_mgr_init(pdata)) + return -ENODEV; + clk_prepare_enable(pdata->clk); clk_disable_unprepare(pdata->clk); clk_prepare_enable(pdata->clk); xgene_enet_ecc_init(pdata); xgene_enet_config_ring_if_assoc(pdata); + + return 0; } static void xgene_enet_xgcle_bypass(struct xgene_enet_pdata *pdata, From bdd330f0506b2c4d02d5453fa32e785f0c348388 Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 3 Nov 2014 11:59:56 -0800 Subject: [PATCH 040/138] drivers: net: xgene: fix: Use separate resources This patch fixes the following kernel crash during SGMII based 1GbE probe. BUG: Bad page state in process swapper/0 pfn:40fe6ad page:ffffffbee37a75d8 count:-1 mapcount:0 mapping: (null) index:0x0 flags: 0x0() page dumped because: nonzero _count Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.17.0+ #7 Call trace: [] dump_backtrace+0x0/0x12c [] show_stack+0x10/0x1c [] dump_stack+0x74/0xc4 [] bad_page+0xd8/0x128 [] get_page_from_freelist+0x4b8/0x640 [] __alloc_pages_nodemask+0xd8/0x834 [] __netdev_alloc_frag+0x124/0x1b8 [] __netdev_alloc_skb+0x90/0x10c [] xgene_enet_refill_bufpool+0x11c/0x280 [] xgene_enet_process_ring+0x168/0x340 [] xgene_enet_napi+0x1c/0x50 [] net_rx_action+0xc8/0x18c [] __do_softirq+0x114/0x24c [] irq_exit+0x94/0xc8 [] __handle_domain_irq+0x8c/0xf4 [] gic_handle_irq+0x30/0x7c This was due to hardware resource sharing conflict with the firmware. This patch fixes this crash by using resources (descriptor ring, prefetch buffer) that are not shared. Signed-off-by: Iyappan Subramanian Signed-off-by: Keyur Chudgar Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 6 +++--- drivers/net/ethernet/apm/xgene/xgene_enet_main.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index cc3f9559a19687..123669696184fd 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -639,9 +639,9 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) struct device *dev = ndev_to_dev(ndev); struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring; struct xgene_enet_desc_ring *buf_pool = NULL; - u8 cpu_bufnum = 0, eth_bufnum = 0; - u8 bp_bufnum = 0x20; - u16 ring_id, ring_num = 0; + u8 cpu_bufnum = 0, eth_bufnum = START_ETH_BUFNUM; + u8 bp_bufnum = START_BP_BUFNUM; + u16 ring_id, ring_num = START_RING_NUM; int ret; /* allocate rx descriptor ring */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index dba647d357400a..f9958fae6ffdc9 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -38,6 +38,9 @@ #define SKB_BUFFER_SIZE (XGENE_ENET_MAX_MTU - NET_IP_ALIGN) #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 +#define START_ETH_BUFNUM 2 +#define START_BP_BUFNUM 0x22 +#define START_RING_NUM 8 #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) From 7179621023011f23f636b3e9fcc97c41aa9d6823 Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Fri, 31 Oct 2014 18:09:33 +0530 Subject: [PATCH 041/138] cpufreq: cpufreq-dt: Fix arguments in clock failure error message Fix the swapped arguments in the clock failure dev_err. Signed-off-by: Abhilash Kesavan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 23aaf40cf37f5b..f657c571b18e4e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -166,8 +166,8 @@ static int allocate_resources(int cpu, struct device **cdev, if (ret == -EPROBE_DEFER) dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); else - dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", ret, - cpu); + dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, + ret); } else { *cdev = cpu_dev; *creg = cpu_reg; From 45cac46e51da75628ac2a593c70f5144abb9b31d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Nov 2014 19:38:37 -0800 Subject: [PATCH 042/138] geneve: Set GSO type on transmit. Geneve does not currently set the inner protocol type when transmitting packets. This causes GSO segmentation to fail on NICs that do not support Geneve offloading. CC: Andy Zhou Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 065cd94c640c0c..6e5266cf403dae 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -144,6 +144,8 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet); } From d3ca9eafc0ed97b8f56fdf23655cfece89c48354 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Nov 2014 19:38:38 -0800 Subject: [PATCH 043/138] geneve: Unregister pernet subsys on module unload. The pernet ops aren't ever unregistered, which causes a memory leak and an OOPs if the module is ever reinserted. Fixes: 0b5e8b8eeae4 ("net: Add Geneve tunneling protocol driver") CC: Andy Zhou Signed-off-by: Jesse Gross Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 6e5266cf403dae..dedb21e9991438 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -366,6 +366,7 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { destroy_workqueue(geneve_wq); + unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); From 219b5f29a570b94151533e701cc3a504efa86ef3 Mon Sep 17 00:00:00 2001 From: Loganaden Velvindron Date: Tue, 4 Nov 2014 03:02:49 -0800 Subject: [PATCH 044/138] net: Add missing descriptions for fwmark_reflect for ipv4 and ipv6. It was initially sent by Lorenzo Colitti, but was subsequently lost in the final diff he submitted. Signed-off-by: Loganaden Velvindron Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 0307e2875f2159..a476b08a43e0da 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -56,6 +56,13 @@ ip_forward_use_pmtu - BOOLEAN 0 - disabled 1 - enabled +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1201,6 +1208,13 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. From 9cdb5dbf79f4e8f43e19ab7f4ec9ed74c146f0af Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 5 Nov 2014 21:44:27 +0100 Subject: [PATCH 045/138] include/linux/socket.h: Fix comment File descriptors are always closed on exit :-) Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index ec538fc287a660..bb9b83640070f4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -256,7 +256,7 @@ struct ucred { #define MSG_EOF MSG_FIN #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ -#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through SCM_RIGHTS */ #if defined(CONFIG_COMPAT) From b994ca6b67ade73c2d69c00a449ad7006cd4546e Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 4 Nov 2014 22:43:17 +0800 Subject: [PATCH 046/138] drivers: net: ethernet: xilinx: xilinx_emaclite: revert the original commit "1db3ddff1602edf2390b7667dcbaa0f71512e3ea" Microblaze is a fpga soft core, it can be customized easily, which may cause many various hardware version strings. So the original fix patch based on hard-coded compatible version strings is not a good idea (although it is correct for current issue). For it, there will be a new solving way soon (which based on the device tree). The original issue is related with qemu, so can only change the hardware version string in qemu for it, then keep the original driver no touch ( qemu is for virtualization which has much easier life than real world). Signed-off-by: Chen Gang Acked-by: Michal Simek Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 298fad395d3a42..28dbbdc393ebf2 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1236,7 +1236,6 @@ static struct of_device_id xemaclite_of_match[] = { { .compatible = "xlnx,opb-ethernetlite-1.01.b", }, { .compatible = "xlnx,xps-ethernetlite-1.00.a", }, { .compatible = "xlnx,xps-ethernetlite-2.00.a", }, - { .compatible = "xlnx,xps-ethernetlite-2.00.b", }, { .compatible = "xlnx,xps-ethernetlite-2.01.a", }, { .compatible = "xlnx,xps-ethernetlite-3.00.a", }, { /* end of list */ }, From 16ee817e4365cdfe665ae8d6f6bc8f09befa1272 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 4 Nov 2014 17:08:05 +0100 Subject: [PATCH 047/138] stmmac: fix stmmac_tx_avail should be called with TX locked stmmac_tx_avail() may lie if used unprotected. It's using cur_tx and dirty_tx index. These index may be already in use by tx_clean when entering xmit routine. So, this should be called locked. This can cause transmit queue to be stuck, with following message: NETDEV WATCHDOG: eth0 (stmmaceth): transmit queue 0 timed out Signed-off-by: Fabrice Gasnier Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6f77a46c7e2c44..bcd8a341472286 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1894,7 +1894,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int nopaged_len = skb_headlen(skb); unsigned int enh_desc = priv->plat->enh_desc; + spin_lock(&priv->tx_lock); + if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) { + spin_unlock(&priv->tx_lock); if (!netif_queue_stopped(dev)) { netif_stop_queue(dev); /* This is a hard error, log it. */ @@ -1903,8 +1906,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - spin_lock(&priv->tx_lock); - if (priv->tx_path_in_lpi_mode) stmmac_disable_eee_mode(priv); From 758a0ab59b9bed75d8c8fcaed3cb41f10a586793 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 4 Nov 2014 17:08:06 +0100 Subject: [PATCH 048/138] stmmac: release tx lock, in case of dma mapping error. Add missing spin_unlock when tx frames gets dropped. Signed-off-by: Fabrice Gasnier Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bcd8a341472286..ee07e7eee6470d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2026,6 +2026,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; dma_map_err: + spin_unlock(&priv->tx_lock); dev_err(priv->device, "Tx dma map failed\n"); dev_kfree_skb(skb); priv->dev->stats.tx_dropped++; From b9d73704aab92602fcadff26f61462a6445bd0cf Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 4 Nov 2014 17:08:07 +0100 Subject: [PATCH 049/138] stmmac: fix lock in stmmac_set_rx_mode When compile with CONFIG_PROVE_LOCKING the following warnings happen: [snip] HARDIRQ-ON-W at: [] _raw_spin_lock+0x3c/0x4c [] stmmac_set_rx_mode+0x18/0x3c [] dev_set_rx_mode+0x1c/0x28 [] __dev_open+0xb4/0xf8 [] __dev_change_flags+0x94/0x128 [] dev_change_flags+0x10/0x48 [] ip_auto_config+0x1d4/0x1084 [] do_one_initcall+0x108/0x15c [] kernel_init_freeable+0x1a8/0x248 [] kernel_init+0x8/0x160 [] ret_from_fork+0x14/0x2c INITIAL USE at: [] _raw_spin_lock+0x3c/0x4c [] stmmac_set_rx_mode+0x18/0x3c [] dev_set_rx_mode+0x1c/0x28 [] __dev_open+0xb4/0xf8 [] __dev_change_flags+0x94/0x128 [] dev_change_flags+0x10/0x48 [] ip_auto_config+0x1d4/0x1084 [] do_one_initcall+0x108/0x15c [] kernel_init_freeable+0x1a8/0x248 [] kernel_init+0x8/0x160 [] ret_from_fork+0x14/0x2c so the patch just removes the lock protection in the stmmac_set_rx_mode Signed-off-by: Giuseppe Cavallaro Cc: Emilio Lopez Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ee07e7eee6470d..27598738c9cd66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2283,9 +2283,7 @@ static void stmmac_set_rx_mode(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - spin_lock(&priv->lock); priv->hw->mac->set_filter(priv->hw, dev); - spin_unlock(&priv->lock); } /** From 4741cf9cecf8af57dc612dd96b0056fa8e2f301d Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 4 Nov 2014 17:08:08 +0100 Subject: [PATCH 050/138] stmmac: fix concurrency in eee initialization. This patch aims to fix the concurrency in eee initialization inside the stmmac driver and related warnings when enable DEBUG_ATOMIC_SLEEP. Prior this patch, the stmmac_eee_init could be called in several places as shown below: stmmac_open stmmac_resume PHY Layer | | | stmmac_hw_setup stmmac_adjust_link | | stmmac ethtool |__________________________|______________| | stmmac_eee_init The patch removes the stmmac_eee_init call inside the stmmac_hw_setup that is unnecessary. It is sufficient to call it in the adjust_link to always guarantee that EEE is always configured at mac level too. Fixing the lock protection now it is covered another case (not considered before). The stmmac_eee_init could be called by the ethtool so critical sections must be protected inside this function too. Signed-off-by: Giuseppe Cavallaro Cc: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 27598738c9cd66..9c79bf23d0b8bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -276,6 +276,7 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) bool stmmac_eee_init(struct stmmac_priv *priv) { char *phy_bus_name = priv->plat->phy_bus_name; + unsigned long flags; bool ret = false; /* Using PCS we cannot dial with the phy registers at this stage @@ -300,6 +301,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) * changed). * In that case the driver disable own timers. */ + spin_lock_irqsave(&priv->lock, flags); if (priv->eee_active) { pr_debug("stmmac: disable EEE\n"); del_timer_sync(&priv->eee_ctrl_timer); @@ -307,9 +309,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv) tx_lpi_timer); } priv->eee_active = 0; + spin_unlock_irqrestore(&priv->lock, flags); goto out; } /* Activate the EEE and start timers */ + spin_lock_irqsave(&priv->lock, flags); if (!priv->eee_active) { priv->eee_active = 1; init_timer(&priv->eee_ctrl_timer); @@ -325,9 +329,10 @@ bool stmmac_eee_init(struct stmmac_priv *priv) /* Set HW EEE according to the speed */ priv->hw->mac->set_eee_pls(priv->hw, priv->phydev->link); - pr_debug("stmmac: Energy-Efficient Ethernet initialized\n"); - ret = true; + spin_unlock_irqrestore(&priv->lock, flags); + + pr_debug("stmmac: Energy-Efficient Ethernet initialized\n"); } out: return ret; @@ -760,12 +765,12 @@ static void stmmac_adjust_link(struct net_device *dev) if (new_state && netif_msg_link(priv)) phy_print_status(phydev); + spin_unlock_irqrestore(&priv->lock, flags); + /* At this stage, it could be needed to setup the EEE or adjust some * MAC related HW registers. */ priv->eee_enabled = stmmac_eee_init(priv); - - spin_unlock_irqrestore(&priv->lock, flags); } /** @@ -1705,8 +1710,6 @@ static int stmmac_hw_setup(struct net_device *dev) } priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; - priv->eee_enabled = stmmac_eee_init(priv); - stmmac_init_tx_coalesce(priv); if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { From 777da230c5b98a6a3793f0525dd99e1e61c8a072 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 4 Nov 2014 17:08:09 +0100 Subject: [PATCH 051/138] stmmac: fix atomicity in pm routines This patch is to fix the atomicity when suspend and resume the driver. The clk api have been changed (as reported by Hao Liang) and the skb allocation is done out of the hw setup function and taking care about the GFP flags. Reported-by: Hao Liang Signed-off-by: Giuseppe Cavallaro Cc: Alexey Khoroshilov Cc: Hao Liang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9c79bf23d0b8bb..18c46bb0f3bfa2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -964,12 +964,12 @@ static void stmmac_clear_descriptors(struct stmmac_priv *priv) } static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, - int i) + int i, gfp_t flags) { struct sk_buff *skb; skb = __netdev_alloc_skb(priv->dev, priv->dma_buf_sz + NET_IP_ALIGN, - GFP_KERNEL); + flags); if (!skb) { pr_err("%s: Rx init fails; skb is NULL\n", __func__); return -ENOMEM; @@ -1011,7 +1011,7 @@ static void stmmac_free_rx_buffers(struct stmmac_priv *priv, int i) * and allocates the socket buffers. It suppors the chained and ring * modes. */ -static int init_dma_desc_rings(struct net_device *dev) +static int init_dma_desc_rings(struct net_device *dev, gfp_t flags) { int i; struct stmmac_priv *priv = netdev_priv(dev); @@ -1046,7 +1046,7 @@ static int init_dma_desc_rings(struct net_device *dev) else p = priv->dma_rx + i; - ret = stmmac_init_rx_buffers(priv, p, i); + ret = stmmac_init_rx_buffers(priv, p, i, flags); if (ret) goto err_init_rx_buffers; @@ -1652,11 +1652,6 @@ static int stmmac_hw_setup(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); int ret; - ret = init_dma_desc_rings(dev); - if (ret < 0) { - pr_err("%s: DMA descriptors initialization failed\n", __func__); - return ret; - } /* DMA initialization and SW reset */ ret = stmmac_init_dma_engine(priv); if (ret < 0) { @@ -1710,8 +1705,6 @@ static int stmmac_hw_setup(struct net_device *dev) } priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; - stmmac_init_tx_coalesce(priv); - if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { priv->rx_riwt = MAX_DMA_RIWT; priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT); @@ -1764,12 +1757,20 @@ static int stmmac_open(struct net_device *dev) goto dma_desc_error; } + ret = init_dma_desc_rings(dev, GFP_KERNEL); + if (ret < 0) { + pr_err("%s: DMA descriptors initialization failed\n", __func__); + goto init_error; + } + ret = stmmac_hw_setup(dev); if (ret < 0) { pr_err("%s: Hw setup failed\n", __func__); goto init_error; } + stmmac_init_tx_coalesce(priv); + if (priv->phydev) phy_start(priv->phydev); @@ -2953,7 +2954,7 @@ int stmmac_suspend(struct net_device *ndev) stmmac_set_mac(priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ - clk_disable_unprepare(priv->stmmac_clk); + clk_disable(priv->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); @@ -2985,7 +2986,7 @@ int stmmac_resume(struct net_device *ndev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk prevously disabled */ - clk_prepare_enable(priv->stmmac_clk); + clk_enable(priv->stmmac_clk); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); @@ -2993,7 +2994,9 @@ int stmmac_resume(struct net_device *ndev) netif_device_attach(ndev); + init_dma_desc_rings(ndev, GFP_ATOMIC); stmmac_hw_setup(ndev); + stmmac_init_tx_coalesce(priv); napi_enable(&priv->napi); From 1f37bf87aa7523d28e7e4c4f7bb5dba98faa3e00 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Tue, 4 Nov 2014 17:15:08 -0200 Subject: [PATCH 052/138] tcp: zero retrans_stamp if all retrans were acked Ueki Kohei reported that when we are using NewReno with connections that have a very low traffic, we may timeout the connection too early if a second loss occurs after the first one was successfully acked but no data was transfered later. Below is his description of it: When SACK is disabled, and a socket suffers multiple separate TCP retransmissions, that socket's ETIMEDOUT value is calculated from the time of the *first* retransmission instead of the *latest* retransmission. This happens because the tcp_sock's retrans_stamp is set once then never cleared. Take the following connection: Linux remote-machine | | send#1---->(*1)|--------> data#1 --------->| | | | RTO : : | | | ---(*2)|----> data#1(retrans) ---->| | (*3)|<---------- ACK <----------| | | | | : : | : : | : : 16 minutes (or more) : | : : | : : | : : | | | send#2---->(*4)|--------> data#2 --------->| | | | RTO : : | | | ---(*5)|----> data#2(retrans) ---->| | | | | | | RTO*2 : : | | | | | | ETIMEDOUT<----(*6)| | (*1) One data packet sent. (*2) Because no ACK packet is received, the packet is retransmitted. (*3) The ACK packet is received. The transmitted packet is acknowledged. At this point the first "retransmission event" has passed and been recovered from. Any future retransmission is a completely new "event". (*4) After 16 minutes (to correspond with retries2=15), a new data packet is sent. Note: No data is transmitted between (*3) and (*4). The socket's timeout SHOULD be calculated from this point in time, but instead it's calculated from the prior "event" 16 minutes ago. (*5) Because no ACK packet is received, the packet is retransmitted. (*6) At the time of the 2nd retransmission, the socket returns ETIMEDOUT. Therefore, now we clear retrans_stamp as soon as all data during the loss window is fully acked. Reported-by: Ueki Kohei Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 60 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a12b455928e522..88fa2d1606859d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) /* Undo procedures. */ +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static bool tcp_any_retrans_done(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return true; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return true; + + return false; +} + #if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { @@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ tcp_moderate_cwnd(tp); + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; return true; } tcp_set_ca_state(sk, TCP_CA_Open); @@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk) return false; } -/* We can clear retrans_stamp when there are no retransmissions in the - * window. It would seem that it is trivially available for us in - * tp->retrans_out, however, that kind of assumptions doesn't consider - * what will happen if errors occur when sending retransmission for the - * second time. ...It could the that such segment has only - * TCPCB_EVER_RETRANS set at the present time. It seems that checking - * the head skb is enough except for some reneging corner cases that - * are not worth the effort. - * - * Main reason for all this complexity is the fact that connection dying - * time now depends on the validity of the retrans_stamp, in particular, - * that successive retransmissions of a segment must not advance - * retrans_stamp under any conditions. - */ -static bool tcp_any_retrans_done(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (tp->retrans_out) - return true; - - skb = tcp_write_queue_head(sk); - if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) - return true; - - return false; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { From 66f1c44887ba4f47d617f8ae21cf8e04e1892bd7 Mon Sep 17 00:00:00 2001 From: Gregory Fong Date: Tue, 4 Nov 2014 11:21:21 -0800 Subject: [PATCH 053/138] bridge: include in6.h in if_bridge.h for struct in6_addr if_bridge.h uses struct in6_addr ip6, but wasn't including the in6.h header. Thomas Backlund originally sent a patch to do this, but this revealed a redefinition issue: https://lkml.org/lkml/2013/1/13/116 The redefinition issue should have been fixed by the following Linux commits: ee262ad827f89e2dc7851ec2986953b5b125c6bc inet: defines IPPROTO_* needed for module alias generation cfd280c91253cc28e4919e349fa7a813b63e71e8 net: sync some IP headers with glibc and the following glibc commit: 6c82a2f8d7c8e21e39237225c819f182ae438db3 Coordinate IPv6 definitions for Linux and glibc so actually include the header now. Reported-by: Colin Guthrie Reported-by: Christiaan Welvaart Reported-by: Thomas Backlund Cc: Florian Fainelli Cc: Cong Wang Cc: David Miller Signed-off-by: Gregory Fong Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 39f621a9fe826c..da17e456908d2d 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -15,6 +15,7 @@ #include #include +#include #define SYSFS_BRIDGE_ATTR "bridge" #define SYSFS_BRIDGE_FDB "brforward" From e4742b1e786ca386e88e6cfb2801e14e15e365cd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 6 Nov 2014 09:27:11 -0800 Subject: [PATCH 054/138] Input: synaptics - add min/max quirk for Lenovo T440s The new Lenovo T440s laptop has a different PnP ID "LEN0039", and it needs the similar min/max quirk to make its clickpad working. BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=903748 Reported-and-tested-by: Joschi Brauchle Cc: Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9031a0a28ea4f2..2a7a9174c702a4 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -135,8 +135,8 @@ static const struct min_max_quirk min_max_pnpid_table[] = { 1232, 5710, 1156, 4696 }, { - (const char * const []){"LEN0034", "LEN0036", "LEN2002", - "LEN2004", NULL}, + (const char * const []){"LEN0034", "LEN0036", "LEN0039", + "LEN2002", "LEN2004", NULL}, 1024, 5112, 2024, 4832 }, { @@ -163,6 +163,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0036", /* T440 */ "LEN0037", "LEN0038", + "LEN0039", /* T440s */ "LEN0041", "LEN0042", /* Yoga */ "LEN0045", From 2c2a9cbd64387d6b70ac5db013e9bfe9412c7354 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 5 Nov 2014 15:32:59 +0100 Subject: [PATCH 055/138] net: mv643xx_eth: reclaim TX skbs only when released by the HW ATM, txq_reclaim will dequeue and free an skb for each tx desc released by the hw that has TX_LAST_DESC set. However, in case of TSO, each hw desc embedding the last part of a segment has TX_LAST_DESC set, losing the one-to-one 'last skb frag'/'TX_LAST_DESC set' correspondance, which causes data corruption. Fix this by checking TX_ENABLE_INTERRUPT instead of TX_LAST_DESC, and warn when trying to dequeue from an empty txq (which can be symptomatic of releasing skbs prematurely). Fixes: 3ae8f4e0b98 ('net: mv643xx_eth: Implement software TSO') Reported-by: Slawomir Gajzner Reported-by: Julien D'Ascenzio Signed-off-by: Karl Beldan Cc: Ian Campbell Cc: Eric Dumazet Cc: Ezequiel Garcia Cc: Sebastian Hesselbarth Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index b151a949f352a2..d44560d1d268ca 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1047,7 +1047,6 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) int tx_index; struct tx_desc *desc; u32 cmd_sts; - struct sk_buff *skb; tx_index = txq->tx_used_desc; desc = &txq->tx_desc_area[tx_index]; @@ -1066,19 +1065,22 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) reclaimed++; txq->tx_desc_count--; - skb = NULL; - if (cmd_sts & TX_LAST_DESC) - skb = __skb_dequeue(&txq->tx_skb); + if (!IS_TSO_HEADER(txq, desc->buf_ptr)) + dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, + desc->byte_cnt, DMA_TO_DEVICE); + + if (cmd_sts & TX_ENABLE_INTERRUPT) { + struct sk_buff *skb = __skb_dequeue(&txq->tx_skb); + + if (!WARN_ON(!skb)) + dev_kfree_skb(skb); + } if (cmd_sts & ERROR_SUMMARY) { netdev_info(mp->dev, "tx error\n"); mp->dev->stats.tx_errors++; } - if (!IS_TSO_HEADER(txq, desc->buf_ptr)) - dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, - desc->byte_cnt, DMA_TO_DEVICE); - dev_kfree_skb(skb); } __netif_tx_unlock_bh(nq); From 4484d0524e86dcb2fb5ad0805b09d3e74559ebd5 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 5 Nov 2014 18:33:31 +0530 Subject: [PATCH 056/138] drivers: net: cpsw: remove cpsw_ale_stop from cpsw_ale_destroy when cpsw is build as modulea and simple insert and removal of module creates a deadlock, due to delete timer. the timer is created and destroyed in cpsw_ale_start and cpsw_ale_stop which are from device open and close. root@am437x-evm:~# modprobe -r ti_cpsw [ 158.505333] INFO: trying to register non-static key. [ 158.510623] the code is fine but needs lockdep annotation. [ 158.516448] turning off the locking correctness validator. [ 158.522282] CPU: 0 PID: 1339 Comm: modprobe Not tainted 3.14.23-00445-gd41c88f #44 [ 158.530359] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 158.538603] [] (show_stack) from [] (dump_stack+0x78/0x94) [ 158.546295] [] (dump_stack) from [] (__lock_acquire+0x176c/0x1b74) [ 158.554711] [] (__lock_acquire) from [] (lock_acquire+0x9c/0x104) [ 158.563043] [] (lock_acquire) from [] (del_timer_sync+0x44/0xd8) [ 158.571289] [] (del_timer_sync) from [] (cpsw_ale_destroy+0x10/0x3c [ti_cpsw]) [ 158.580821] [] (cpsw_ale_destroy [ti_cpsw]) from [] (cpsw_remove+0x30/0xa0 [ti_cpsw]) [ 158.591000] [] (cpsw_remove [ti_cpsw]) from [] (platform_drv_remove+0x18/0x1c) [ 158.600527] [] (platform_drv_remove) from [] (__device_release_driver+0x70/0xc8) [ 158.610236] [] (__device_release_driver) from [] (driver_detach+0xb4/0xb8) [ 158.619386] [] (driver_detach) from [] (bus_remove_driver+0x4c/0x90) [ 158.627988] [] (bus_remove_driver) from [] (SyS_delete_module+0x10c/0x198) [ 158.637144] [] (SyS_delete_module) from [] (ret_fast_syscall+0x0/0x48) [ 179.524727] INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 0, t=2102 jiffies, g=1487, c=1486, q=6) [ 179.535741] INFO: Stall ended before state dump start Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_ale.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 3ae83879a75f5e..097ebe7077ac0c 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -785,7 +785,6 @@ int cpsw_ale_destroy(struct cpsw_ale *ale) { if (!ale) return -EINVAL; - cpsw_ale_stop(ale); cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0); kfree(ale); return 0; From 5816c3dafb6c63fd5c7b9f3f707c8565811d9916 Mon Sep 17 00:00:00 2001 From: Ryo Munakata Date: Wed, 5 Nov 2014 23:45:58 +0900 Subject: [PATCH 057/138] net/9p: remove a comment about pref member which doesn't exist Signed-off-by: Ryo Munakata Signed-off-by: David S. Miller --- include/net/9p/transport.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index d9fa68f26c41c3..2a25dec3021166 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -34,7 +34,6 @@ * @list: used to maintain a list of currently available transports * @name: the human-readable name of the transport * @maxsize: transport provided maximum packet size - * @pref: Preferences of this transport * @def: set if this transport should be considered the default * @create: member function to create a new connection on this transport * @close: member function to discard a connection on this transport From b31f65fb4383a49bdcfa465176754b37e44e1e17 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Nov 2014 19:47:28 +0100 Subject: [PATCH 058/138] net: dsa: slave: Fix autoneg for phys on switch MDIO bus When the ports phys are connected to the switches internal MDIO bus, we need to connect the phy to the slave netdev, otherwise auto-negotiation etc, does not work. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6d1817449c3675..ab03e00ffe8f0e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -489,11 +489,14 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p, /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) + if (!p->phy) { p->phy = ds->slave_mii_bus->phy_map[p->port]; - else + phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); + } else { pr_info("attached PHY at address %d [%s]\n", p->phy->addr, p->phy->drv->name); + } } int dsa_slave_suspend(struct net_device *slave_dev) From a158906dd7d4379e85ec371a14edfe1ce5f2318d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 6 Nov 2014 12:51:21 +0200 Subject: [PATCH 059/138] net/mlx5_core: Fix race in create EQ After the EQ is created, it can possibly generate interrupts and the interrupt handler is referencing eq->dev. It is therefore required to set eq->dev before calling request_irq() so if an event is generated before request_irq() returns, we will have a valid eq->dev field. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index a278238a2db643..ad2c96a02a5352 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -374,15 +374,14 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); eq->eqn = out.eq_number; + eq->irqn = vecidx; + eq->dev = dev; + eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, eq->name, eq); if (err) goto err_eq; - eq->irqn = vecidx; - eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = mlx5_debug_eq_add(dev, eq); if (err) goto err_irq; From 364d1798efdf13c7f2b9d902228adf8e84f1d963 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 6 Nov 2014 12:51:22 +0200 Subject: [PATCH 060/138] net/mlx5_core: Fix race on driver load When events arrive at driver load, the event handler gets called even before the spinlock and list are initialized. Fix this by moving the initialization before EQs creation. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3d8e8e489b2ddb..71b10b210792e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -864,14 +864,14 @@ static int init_one(struct pci_dev *pdev, dev->profile = &profile[prof_sel]; dev->event = mlx5_core_event; + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); err = mlx5_dev_init(dev, pdev); if (err) { dev_err(&pdev->dev, "mlx5_dev_init failed %d\n", err); goto out; } - INIT_LIST_HEAD(&priv->ctx_list); - spin_lock_init(&priv->ctx_lock); err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); From 44aa91ab2bb862540daa81403a1bc507496260fe Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 6 Nov 2014 15:21:38 +0530 Subject: [PATCH 061/138] enic: handle error condition properly in enic_rq_indicate_buf In case of error in rx path, we free the buf->os_buf but we do not make it NULL. In next iteration we use the skb which is already freed. This causes the following crash. [ 886.154772] general protection fault: 0000 [#1] PREEMPT SMP [ 886.154851] Modules linked in: rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 microcode evdev cirrus ttm drm_kms_helper drm enic syscopyarea sysfillrect sysimgblt psmouse i2c_piix4 serio_raw pcspkr i2c_core nfs lockd grace sunrpc fscache ext4 crc16 mbcache jbd2 sd_mod crc_t10dif crct10dif_common ata_generic ata_piix virtio_balloon libata scsi_mod uhci_hcd usbcore virtio_pci virtio_ring virtio usb_common [ 886.155199] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 3.17.0-netnext-05668-g876bc7f #272 [ 886.155263] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 886.155304] task: ffffffff81a1d580 ti: ffffffff81a00000 task.ti: ffffffff81a00000 [ 886.155356] RIP: 0010:[] [] kfree_skb_list+0x10/0x30 [ 886.155418] RSP: 0018:ffff880210603d48 EFLAGS: 00010206 [ 886.155456] RAX: 0000000000000020 RBX: 0000000000000000 RCX: 0000000000000000 [ 886.155504] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 004500084e000017 [ 886.155553] RBP: ffff880210603d50 R08: 00000000fe13d1b6 R09: 0000000000000001 [ 886.155601] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880209ff2f00 [ 886.155650] R13: ffff88020ac0fe40 R14: ffff880209ff2f00 R15: ffff8800da8e3a80 [ 886.155699] FS: 0000000000000000(0000) GS:ffff880210600000(0000) knlGS:0000000000000000 [ 886.155774] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 886.155814] CR2: 00007f0e0c925000 CR3: 0000000035e8b000 CR4: 00000000000006f0 [ 886.155865] Stack: [ 886.155882] 0000000000000000 ffff880210603d78 ffffffff81383f79 ffff880209ff2f00 [ 886.155942] ffff88020b0c0b40 000000000000c000 ffff880210603d90 ffffffff81383faf [ 886.156001] ffff880209ff2f00 ffff880210603da8 ffffffff8138406d ffff88020b1b08c0 [ 886.156061] Call Trace: [ 886.156080] [ 886.156095] [ 886.156112] [] skb_release_data+0xa9/0xc0 [ 886.157656] [] skb_release_all+0x1f/0x30 [ 886.159195] [] consume_skb+0x1d/0x40 [ 886.160719] [] __dev_kfree_skb_any+0x35/0x40 [ 886.162224] [] enic_rq_service.constprop.47+0xe5/0x5a0 [enic] [ 886.163756] [] enic_poll_msix_rq+0x199/0x370 [enic] [ 886.164730] [] net_rx_action+0x139/0x210 [ 886.164730] [] __do_softirq+0x14e/0x280 [ 886.164730] [] irq_exit+0x8e/0xb0 [ 886.164730] [] do_IRQ+0x5d/0x100 [ 886.164730] [] common_interrupt+0x72/0x72 fixes: a03bb56e67c357980dae886683733dab5583dc14 ("enic: implement rx_copybreak") Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 180e53fa628fac..cd254d1ebd1669 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1037,7 +1037,10 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, enic->rq_truncated_pkts++; } + pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); dev_kfree_skb_any(skb); + buf->os_buf = NULL; return; } @@ -1088,7 +1091,10 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, /* Buffer overflow */ + pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); dev_kfree_skb_any(skb); + buf->os_buf = NULL; } } From f6b7734ba7b9c8c85c897c7b4b4e526bd5d7c978 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 6 Nov 2014 15:21:39 +0530 Subject: [PATCH 062/138] enic: update desc properly in rx_copybreak When we reuse the rx buffer, we need to update the desc. If not hardware sees stale value. In the following crash, when mtu is changed, hardware sees old rx buffer value and crashes on skb_put. Fix this by using enic_queue_rq_desc helper function which updates the necessary desc. [ 64.657376] skbuff: skb_over_panic: text:ffffffffa041f55d len:9010 put:9010 head:ffff8800d3ca9fc0 data:ffff8800d3caa000 tail:0x2372 end:0x640 dev:enp0s3 [ 64.659965] ------------[ cut here ]------------ [ 64.661322] kernel BUG at net/core/skbuff.c:100! [ 64.662644] invalid opcode: 0000 [#1] PREEMPT SMP [ 64.664001] Modules linked in: rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 cirrus ttm drm_kms_helper drm enic psmouse microcode evdev serio_raw syscopyarea sysfillrect sysimgblt i2c_piix4 i2c_core pcspkr nfs lockd grace sunrpc fscache ext4 crc16 mbcache jbd2 sd_mod ata_generic virtio_balloon ata_piix libata uhci_hcd virtio_pci virtio_ring usbcore usb_common virtio scsi_mod [ 64.664834] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 3.17.0-netnext-10335-g942396b-dirty #273 [ 64.664834] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 64.664834] task: ffffffff81a1d580 ti: ffffffff81a00000 task.ti: ffffffff81a00000 [ 64.664834] RIP: 0010:[] [] skb_panic+0x61/0x70 [ 64.664834] RSP: 0018:ffff880210603d48 EFLAGS: 00010292 [ 64.664834] RAX: 000000000000008c RBX: ffff88020b0f6930 RCX: 0000000000000000 [ 64.664834] RDX: 000000000000008c RSI: ffffffff8178b288 RDI: 00000000ffffffff [ 64.664834] RBP: ffff880210603d68 R08: 0000000000000001 R09: 0000000000000001 [ 64.664834] R10: 00000000000005ce R11: 0000000000000001 R12: ffff88020b1f0b40 [ 64.664834] R13: 000000000000a332 R14: ffff880209a1a000 R15: 0000000000000001 [ 64.664834] FS: 0000000000000000(0000) GS:ffff880210600000(0000) knlGS:0000000000000000 [ 64.664834] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 64.664834] CR2: 00007f6752935e48 CR3: 0000000035743000 CR4: 00000000000006f0 [ 64.664834] Stack: [ 64.664834] ffff8800d3caa000 0000000000002372 0000000000000640 ffff88020b1f0000 [ 64.664834] ffff880210603d78 ffffffff81392d54 ffff880210603e08 ffffffffa041f55d [ 64.664834] 0000000000000296 ffffffff00000000 00008e7e00008e7e ffff880200002332 [ 64.664834] Call Trace: [ 64.664834] [ 64.664834] [ 64.664834] [] skb_put+0x54/0x60 [ 64.664834] [] enic_rq_service.constprop.47+0x3ad/0x730 [enic] [ 64.664834] [] enic_poll_msix_rq+0x199/0x370 [enic] [ 64.664834] [] net_rx_action+0x139/0x210 [ 64.664834] [] ? __this_cpu_preempt_check+0x13/0x20 [ 64.664834] [] __do_softirq+0x14e/0x280 [ 64.664834] [] irq_exit+0x8e/0xb0 [ 64.664834] [] do_IRQ+0x61/0x100 [ 64.664834] [] common_interrupt+0x72/0x72 fixes: a03bb56e67c357980dae886683733dab5583dc14 ("enic: implement rx_copybreak") Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index cd254d1ebd1669..73cf1653a4a3d4 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -940,18 +940,8 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq) struct vnic_rq_buf *buf = rq->to_use; if (buf->os_buf) { - buf = buf->next; - rq->to_use = buf; - rq->ring.desc_avail--; - if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) { - /* Adding write memory barrier prevents compiler and/or - * CPU reordering, thus avoiding descriptor posting - * before descriptor is initialized. Otherwise, hardware - * can read stale descriptor fields. - */ - wmb(); - iowrite32(buf->index, &rq->ctrl->posted_index); - } + enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, + buf->len); return 0; } From 9d01412ae76fec5274a3d94a28a3552a742a60dc Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 6 Nov 2014 07:58:51 -0500 Subject: [PATCH 063/138] netxen: Fix link event handling. o Poll for the link events only if firmware doesn't have capability to notify the driver for the link events. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 0b2a1ccd276dbd..613037584d08e7 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -2762,7 +2762,8 @@ netxen_fw_poll_work(struct work_struct *work) if (test_bit(__NX_RESETTING, &adapter->state)) goto reschedule; - if (test_bit(__NX_DEV_UP, &adapter->state)) { + if (test_bit(__NX_DEV_UP, &adapter->state) && + !(adapter->capabilities & NX_FW_CAPABILITY_LINK_NOTIFICATION)) { if (!adapter->has_link_events) { netxen_nic_handle_phy_intr(adapter); From 1310b544e5708ab5e4de46e9c70f54b9fd8350b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= Date: Fri, 7 Nov 2014 10:02:47 +0100 Subject: [PATCH 064/138] net: fec: fix regression on i.MX28 introduced by rx_copybreak support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance") introduced a regression for i.MX28. The swap_buffer() function doing the endian conversion of the received data on i.MX28 may access memory beyond the actual packet size in the DMA buffer. fec_enet_copybreak() does not copy those bytes, so that the last bytes of a packet may be filled with invalid data after swapping. This will likely lead to checksum errors on received packets. E.g. when trying to mount an NFS rootfs: UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36 Do the byte swapping and copying to the new skb in one go if necessary. Signed-off-by: Lothar Waßmann Tested-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c27128de8dde5c..3dca494797bd2e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -298,6 +298,16 @@ static void *swap_buffer(void *bufaddr, int len) return bufaddr; } +static void swap_buffer2(void *dst_buf, void *src_buf, int len) +{ + int i; + unsigned int *src = src_buf; + unsigned int *dst = dst_buf; + + for (i = 0; i < len; i += 4, src++, dst++) + *dst = swab32p(src); +} + static void fec_dump(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -1307,7 +1317,7 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff } static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, - struct bufdesc *bdp, u32 length) + struct bufdesc *bdp, u32 length, bool swap) { struct fec_enet_private *fep = netdev_priv(ndev); struct sk_buff *new_skb; @@ -1322,7 +1332,10 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE); - memcpy(new_skb->data, (*skb)->data, length); + if (!swap) + memcpy(new_skb->data, (*skb)->data, length); + else + swap_buffer2(new_skb->data, (*skb)->data, length); *skb = new_skb; return true; @@ -1352,6 +1365,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) u16 vlan_tag; int index = 0; bool is_copybreak; + bool need_swap = id_entry->driver_data & FEC_QUIRK_SWAP_FRAME; #ifdef CONFIG_M532x flush_cache_all(); @@ -1415,7 +1429,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) * include that when passing upstream as it messes up * bridging applications. */ - is_copybreak = fec_enet_copybreak(ndev, &skb, bdp, pkt_len - 4); + is_copybreak = fec_enet_copybreak(ndev, &skb, bdp, pkt_len - 4, + need_swap); if (!is_copybreak) { skb_new = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE); if (unlikely(!skb_new)) { @@ -1430,7 +1445,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) prefetch(skb->data - NET_IP_ALIGN); skb_put(skb, pkt_len - 4); data = skb->data; - if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + if (!is_copybreak && need_swap) swap_buffer(data, pkt_len); /* Extract the enhanced buffer descriptor */ From a4c724d0723b078e4ab4670e557cda1795036a7a Mon Sep 17 00:00:00 2001 From: Giedrius Statkevicius Date: Thu, 30 Oct 2014 18:57:47 +0200 Subject: [PATCH 065/138] platform: hp_accel: add a i8042 filter to remove HPQ6000 data from kb bus stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a i8042 filter to hp_accel to remove accelerometer's data with acpi id HPQ6000 from keyboard bus stream. The codes sent by accelerometer are e0 25, e0 26, e0 27 and e0 28. The relevant information is already passed through /dev/freefall so no need to send these undocumented weird signals through the keyboard bus. Also, unclogs `dmesg` because atkbd complained about weird scan codes, saves processing power and disk space. Signed-off-by: Giedrius Statkevičius Acked-by: Dmitry Torokhov Reviewed-by: Éric Piel Signed-off-by: Darren Hart --- drivers/platform/x86/hp_accel.c | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 13e14ec1d3d711..6bec745b6b92dc 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include "../../misc/lis3lv02d/lis3lv02d.h" #define DRIVER_NAME "hp_accel" @@ -73,6 +75,13 @@ static inline void delayed_sysfs_set(struct led_classdev *led_cdev, /* HP-specific accelerometer driver ------------------------------------ */ +/* e0 25, e0 26, e0 27, e0 28 are scan codes that the accelerometer with acpi id + * HPQ6000 sends through the keyboard bus */ +#define ACCEL_1 0x25 +#define ACCEL_2 0x26 +#define ACCEL_3 0x27 +#define ACCEL_4 0x28 + /* For automatic insertion of the module */ static const struct acpi_device_id lis3lv02d_device_ids[] = { {"HPQ0004", 0}, /* HP Mobile Data Protection System PNP */ @@ -294,6 +303,35 @@ static void lis3lv02d_enum_resources(struct acpi_device *device) printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n"); } +static bool hp_accel_i8042_filter(unsigned char data, unsigned char str, + struct serio *port) +{ + static bool extended; + + if (str & I8042_STR_AUXDATA) + return false; + + if (data == 0xe0) { + extended = true; + return true; + } else if (unlikely(extended)) { + extended = false; + + switch (data) { + case ACCEL_1: + case ACCEL_2: + case ACCEL_3: + case ACCEL_4: + return true; + default: + serio_interrupt(port, 0xe0, 0); + return false; + } + } + + return false; +} + static int lis3lv02d_add(struct acpi_device *device) { int ret; @@ -326,6 +364,11 @@ static int lis3lv02d_add(struct acpi_device *device) if (ret) return ret; + /* filter to remove HPQ6000 accelerometer data + * from keyboard bus stream */ + if (strstr(dev_name(&device->dev), "HPQ6000")) + i8042_install_filter(hp_accel_i8042_filter); + INIT_WORK(&hpled_led.work, delayed_set_status_worker); ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) { @@ -343,6 +386,7 @@ static int lis3lv02d_remove(struct acpi_device *device) if (!device) return -EINVAL; + i8042_remove_filter(hp_accel_i8042_filter); lis3lv02d_joystick_disable(&lis3_dev); lis3lv02d_poweroff(&lis3_dev); From 436c2a5036b6ffe813310df2cf327d3b69be0734 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 6 Nov 2014 15:49:41 +0000 Subject: [PATCH 066/138] asix: Do full reset during ax88772_bind commit 3cc81d85ee01 ("asix: Don't reset PHY on if_up for ASIX 88772") causes the ethernet on Arndale to no longer function. This appears to be because the Arndale ethernet requires a full reset before it will function correctly, however simply reverting the above patch causes problems with ethtool settings getting reset. It seems the problem is that the ethernet is not properly reset during bind, and indeed the code in ax88772_bind that resets the device is a very small subset of the actual ax88772_reset function. This patch uses ax88772_reset in place of the existing reset code in ax88772_bind which removes some code duplication and fixes the ethernet on Arndale. It is still possible that the original patch causes some issues with suspend and resume but that seems like a separate issue and I haven't had a chance to test that yet. Signed-off-by: Charles Keepax Tested-by: Riku Voipio Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 2c05f6cdb12f3a..816d511e34d330 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -465,19 +465,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) return ret; } - ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL); - if (ret < 0) - return ret; - - msleep(150); - - ret = asix_sw_reset(dev, AX_SWRESET_CLEAR); - if (ret < 0) - return ret; - - msleep(150); - - ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_PRTE); + ax88772_reset(dev); /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = asix_get_phyid(dev); From 09712f557b31838092e1f22a5f2dd131a843a3de Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Nov 2014 17:05:25 +0100 Subject: [PATCH 067/138] cpufreq: Avoid crash in resume on SMP without OPP When resuming from s2ram on an SMP system without cpufreq operating points (e.g. there's no "operating-points" property for the CPU node in DT, or the platform doesn't use DT yet), the kernel crashes when bringing CPU 1 online: Enabling non-boot CPUs ... CPU1: Booted secondary processor Unable to handle kernel NULL pointer dereference at virtual address 0000003c pgd = ee5e6b00 [0000003c] *pgd=6e579003, *pmd=6e588003, *pte=00000000 Internal error: Oops: a07 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1246 Comm: s2ram Tainted: G W 3.18.0-rc3-koelsch-01614-g0377af242bb175c8-dirty #589 task: eeec5240 ti: ee704000 task.ti: ee704000 PC is at __cpufreq_add_dev.isra.24+0x24c/0x77c LR is at __cpufreq_add_dev.isra.24+0x244/0x77c pc : [] lr : [] psr: 60000153 sp : ee705d48 ip : ee705d48 fp : ee705d84 r10: c04e0450 r9 : 00000000 r8 : 00000001 r7 : c05426a8 r6 : 00000001 r5 : 00000001 r4 : 00000000 r3 : 00000000 r2 : 00000000 r1 : 20000153 r0 : c0542734 Verify that policy is not NULL before dereferencing it to fix this. Signed-off-by: Geert Uytterhoeven Fixes: 8414809c6a1e (cpufreq: Preserve policy structure across suspend/resume) Cc: 3.12+ # 3.12+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 644b54e1e7d13e..4473eba1d6b0b6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1022,7 +1022,8 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - policy->governor = NULL; + if (policy) + policy->governor = NULL; return policy; } From c16561e8df7a64764ef61f02221e98273add325a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 6 Nov 2014 00:37:08 +0100 Subject: [PATCH 068/138] PM / Domains: Change prototype for the attach and detach callbacks Convert the prototypes to return an int in order to support error handling in these callbacks. Also, as suggested by Dmitry Torokhov, pass the domain pointer for use inside the callbacks, and so that they match the existing power_on/power_off callbacks which currently take the domain pointer. Acked-by: Dmitry Torokhov Acked-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson [ khilman: added domain as parameter to callbacks, as suggested by Dmitry ] Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ++-- include/linux/pm_domain.h | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 40bc2f4072cc28..b520687046d4f3 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1437,7 +1437,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, spin_unlock_irq(&dev->power.lock); if (genpd->attach_dev) - genpd->attach_dev(dev); + genpd->attach_dev(genpd, dev); mutex_lock(&gpd_data->lock); gpd_data->base.dev = dev; @@ -1499,7 +1499,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, genpd->max_off_time_changed = true; if (genpd->detach_dev) - genpd->detach_dev(dev); + genpd->detach_dev(genpd, dev); spin_lock_irq(&dev->power.lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 73e938b7e9374c..b3ed7766a29196 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,10 @@ struct generic_pm_domain { bool max_off_time_changed; bool cached_power_down_ok; struct gpd_cpuidle_data *cpuidle_data; - void (*attach_dev)(struct device *dev); - void (*detach_dev)(struct device *dev); + int (*attach_dev)(struct generic_pm_domain *domain, + struct device *dev); + void (*detach_dev)(struct generic_pm_domain *domain, + struct device *dev); }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) From c42bfd7f6cd26e8f712fc184460e32845d928d17 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 7 Nov 2014 15:46:56 -0800 Subject: [PATCH 069/138] Input: twl4030-pwrbutton - ensure a wakeup event is recorded. This button is treated as a wakeup source, so we need to initialise it correctly. Without the device_init_wakeup() call, dev->power.wakeup will be NULL, and pm_wakeup_event() will do nothing. Signed-off-by: NeilBrown Signed-off-by: Dmitry Torokhov --- drivers/input/misc/twl4030-pwrbutton.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c index fb3b63b2f85c36..8400a1a34d8756 100644 --- a/drivers/input/misc/twl4030-pwrbutton.c +++ b/drivers/input/misc/twl4030-pwrbutton.c @@ -85,6 +85,7 @@ static int twl4030_pwrbutton_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, pwr); + device_init_wakeup(&pdev->dev, true); return 0; } From caeb0d37fa3e387eb0dd22e5d497523c002033d1 Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Fri, 7 Nov 2014 23:51:34 -0800 Subject: [PATCH 070/138] Input: elantech - use elantech_report_trackpoint for hardware v4 too The Fujitsu H730 has hardware v4 with a trackpoint. This enables the elantech_report_trackpoint for v4. Reported-by: Stefan Valouch Tested-by: Stefan Valouch Tested-by: Alfredo Gemma Signed-off-by: Ulrik De Bie Acked-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 06fc6e76ffbe0c..53ddff91ab2e3c 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -792,6 +792,9 @@ static int elantech_packet_check_v4(struct psmouse *psmouse) unsigned char packet_type = packet[3] & 0x03; bool sanity_check; + if ((packet[3] & 0x0f) == 0x06) + return PACKET_TRACKPOINT; + /* * Sanity check based on the constant bits of a packet. * The constant bits change depending on the value of @@ -877,10 +880,19 @@ static psmouse_ret_t elantech_process_byte(struct psmouse *psmouse) case 4: packet_type = elantech_packet_check_v4(psmouse); - if (packet_type == PACKET_UNKNOWN) + switch (packet_type) { + case PACKET_UNKNOWN: return PSMOUSE_BAD_DATA; - elantech_report_absolute_v4(psmouse, packet_type); + case PACKET_TRACKPOINT: + elantech_report_trackpoint(psmouse, packet_type); + break; + + default: + elantech_report_absolute_v4(psmouse, packet_type); + break; + } + break; } From 0dc1587905a50f8f61bbc29e850aa592821e4bea Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Fri, 7 Nov 2014 23:57:34 -0800 Subject: [PATCH 071/138] Input: elantech - fix crc_enabled for Fujitsu H730 The Fujitsu H730 does not work with crc_enabled = 0, even though the crc_enabled bit in the firmware version indicated it would. When switching this value to crc_enabled to 1, the touchpad works. This patch uses DMI to detect H730. Reported-by: Stefan Valouch Tested-by: Stefan Valouch Tested-by: Alfredo Gemma Signed-off-by: Ulrik De Bie Acked-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 53ddff91ab2e3c..ce699eba9adc09 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1450,6 +1450,22 @@ static int elantech_reconnect(struct psmouse *psmouse) return 0; } +/* + * Some hw_version 4 models do not work with crc_disabled + */ +static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Fujitsu H730 does not work with crc_enabled == 0 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H730"), + }, + }, +#endif + { } +}; + /* * Some hw_version 3 models go into error state when we try to set * bit 3 and/or bit 1 of r10. @@ -1525,7 +1541,8 @@ static int elantech_set_properties(struct elantech_data *etd) * The signatures of v3 and v4 packets change depending on the * value of this hardware flag. */ - etd->crc_enabled = ((etd->fw_version & 0x4000) == 0x4000); + etd->crc_enabled = (etd->fw_version & 0x4000) == 0x4000 || + dmi_check_system(elantech_dmi_force_crc_enabled); /* Enable real hardware resolution on hw_version 3 ? */ etd->set_hw_resolution = !dmi_check_system(no_hw_res_dmi_table); From b4565913460cbd7d86c6bd52913dfaa07fa384aa Mon Sep 17 00:00:00 2001 From: Devin Ryles Date: Fri, 7 Nov 2014 18:02:47 -0500 Subject: [PATCH 072/138] ALSA: hda_intel: Add DeviceIDs for Sunrise Point-LP This patch adds DeviceIDs for Sunrise Point-LP Signed-off-by: Devin Ryles Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 9ab1e631cb3224..16660f312043a7 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -219,6 +219,7 @@ MODULE_SUPPORTED_DEVICE("{{Intel, ICH6}," "{Intel, LPT_LP}," "{Intel, WPT_LP}," "{Intel, SPT}," + "{Intel, SPT_LP}," "{Intel, HPT}," "{Intel, PBG}," "{Intel, SCH}," @@ -2004,6 +2005,9 @@ static const struct pci_device_id azx_ids[] = { /* Sunrise Point */ { PCI_DEVICE(0x8086, 0xa170), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + /* Sunrise Point-LP */ + { PCI_DEVICE(0x8086, 0x9d70), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Haswell */ { PCI_DEVICE(0x8086, 0x0a0c), .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, From 403b9636fe9f59124d1a437a297b330729061252 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 8 Nov 2014 19:17:13 +0300 Subject: [PATCH 073/138] PM / sleep: Fix entering suspend-to-IDLE if no freeze_oops is set If no freeze_ops is set, trying to enter suspend-to-IDLE will cause a nice oops in platform_suspend_prepare_late(). Add respective checks to platform_suspend_prepare_late() and platform_resume_early() functions. Fixes: a8d46b9e4e48 (ACPI / sleep: Rework the handling of ACPI GPE wakeup ...) Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4ca9a33ff62020..c347e3ce3a55df 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -146,7 +146,7 @@ static int platform_suspend_prepare(suspend_state_t state) static int platform_suspend_prepare_late(suspend_state_t state) { - return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ? + return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ? freeze_ops->prepare() : 0; } @@ -164,7 +164,7 @@ static void platform_resume_noirq(suspend_state_t state) static void platform_resume_early(suspend_state_t state) { - if (state == PM_SUSPEND_FREEZE && freeze_ops->restore) + if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore) freeze_ops->restore(); } From 4ab8f7f320f91f279c3f06a9795cfea5c972888a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 8 Nov 2014 12:45:23 -0800 Subject: [PATCH 074/138] Input: alps - ignore potential bare packets when device is out of sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5th and 6th byte of ALPS trackstick V3 protocol match condition for first byte of PS/2 3 bytes packet. When driver enters out of sync state and ALPS trackstick is sending data then driver match 5th, 6th and next 1st bytes as PS/2. It basically means if user is using trackstick when driver is in out of sync state driver will never resync. Processing these bytes as 3 bytes PS/2 data cause total mess (random cursor movements, random clicks) and make trackstick unusable until psmouse driver decide to do full device reset. Lot of users reported problems with ALPS devices on Dell Latitude E6440, E6540 and E7440 laptops. ALPS device or Dell EC for unknown reason send some invalid ALPS PS/2 bytes which cause driver out of sync. It looks like that i8042 and psmouse/alps driver always receive group of 6 bytes packets so there are no missing bytes and no bytes were inserted between valid ones. This patch does not fix root of problem with ALPS devices found in Dell Latitude laptops but it does not allow to process some (invalid) subsequence of 6 bytes ALPS packets as 3 bytes PS/2 when driver is out of sync. So with this patch trackstick input device does not report bogus data when also driver is out of sync, so trackstick should be usable on those machines. Signed-off-by: Pali Rohár Tested-by: Pali Rohár Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 2b0ae8cc8e51bc..da18c977e7b83f 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1156,7 +1156,13 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) { struct alps_data *priv = psmouse->private; - if ((psmouse->packet[0] & 0xc8) == 0x08) { /* PS/2 packet */ + /* + * Check if we are dealing with a bare PS/2 packet, presumably from + * a device connected to the external PS/2 port. Because bare PS/2 + * protocol does not have enough constant bits to self-synchronize + * properly we only do this if the device is fully synchronized. + */ + if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) { if (psmouse->pktcnt == 3) { alps_report_bare_ps2_packet(psmouse, psmouse->packet, true); From 9d720b34c0a432639252f63012e18b0507f5b432 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 8 Nov 2014 12:58:57 -0800 Subject: [PATCH 075/138] Input: alps - allow up to 2 invalid packets without resetting device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some Dell Latitude laptops ALPS device or Dell EC send one invalid byte in 6 bytes ALPS packet. In this case psmouse driver enter out of sync state. It looks like that all other bytes in packets are valid and also device working properly. So there is no need to do full device reset, just need to wait for byte which match condition for first byte (start of packet). Because ALPS packets are bigger (6 or 8 bytes) default limit is small. This patch increase number of invalid bytes to size of 2 ALPS packets which psmouse driver can drop before do full reset. Resetting ALPS devices take some time and when doing reset on some Dell laptops touchpad, trackstick and also keyboard do not respond. So it is better to do it only if really necessary. Signed-off-by: Pali Rohár Tested-by: Pali Rohár Reviewed-by: Hans de Goede Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index da18c977e7b83f..433638e7e4af56 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -2395,6 +2395,9 @@ int alps_init(struct psmouse *psmouse) /* We are having trouble resyncing ALPS touchpads so disable it for now */ psmouse->resync_time = 0; + /* Allow 2 invalid packets without resetting device */ + psmouse->resetafter = psmouse->pktsize * 2; + return 0; init_fail: From 65f6ecc93e7cca888a96a68cf6b5292dff1982b6 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 7 Nov 2014 17:06:29 +0530 Subject: [PATCH 076/138] cxgb4vf: Move fl_starv_thres into adapter->sge data structure Move fl_starv_thres into adapter->sge data structure since it _could_ be different from adapter to adapter. Also move other per-adapter SGE values which had been treated as driver globals into adapter->sge. Based on original work by Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4vf/adapter.h | 8 ++ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 93 +++++++++++-------- 2 files changed, 61 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 68eaa9c88c7d8a..3d06e77d712151 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -299,6 +299,14 @@ struct sge { u16 timer_val[SGE_NTIMERS]; /* interrupt holdoff timer array */ u8 counter_val[SGE_NCOUNTERS]; /* interrupt RX threshold array */ + /* Decoded Adapter Parameters. + */ + u32 fl_pg_order; /* large page allocation size */ + u32 stat_len; /* length of status page at ring end */ + u32 pktshift; /* padding between CPL & packet data */ + u32 fl_align; /* response queue message alignment */ + u32 fl_starve_thres; /* Free List starvation threshold */ + /* * Reverse maps from Absolute Queue IDs to associated queue pointers. * The absolute Queue IDs are in a compact range which start at a diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 85036e6b42c4cd..a18830d8aa6d10 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -50,14 +50,6 @@ #include "../cxgb4/t4fw_api.h" #include "../cxgb4/t4_msg.h" -/* - * Decoded Adapter Parameters. - */ -static u32 FL_PG_ORDER; /* large page allocation size */ -static u32 STAT_LEN; /* length of status page at ring end */ -static u32 PKTSHIFT; /* padding between CPL and packet data */ -static u32 FL_ALIGN; /* response queue message alignment */ - /* * Constants ... */ @@ -264,15 +256,19 @@ static inline unsigned int fl_cap(const struct sge_fl *fl) /** * fl_starving - return whether a Free List is starving. + * @adapter: pointer to the adapter * @fl: the Free List * * Tests specified Free List to see whether the number of buffers * available to the hardware has falled below our "starvation" * threshold. */ -static inline bool fl_starving(const struct sge_fl *fl) +static inline bool fl_starving(const struct adapter *adapter, + const struct sge_fl *fl) { - return fl->avail - fl->pend_cred <= FL_STARVE_THRES; + const struct sge *s = &adapter->sge; + + return fl->avail - fl->pend_cred <= s->fl_starve_thres; } /** @@ -457,13 +453,16 @@ static inline void reclaim_completed_tx(struct adapter *adapter, /** * get_buf_size - return the size of an RX Free List buffer. + * @adapter: pointer to the associated adapter * @sdesc: pointer to the software buffer descriptor */ -static inline int get_buf_size(const struct rx_sw_desc *sdesc) +static inline int get_buf_size(const struct adapter *adapter, + const struct rx_sw_desc *sdesc) { - return FL_PG_ORDER > 0 && (sdesc->dma_addr & RX_LARGE_BUF) - ? (PAGE_SIZE << FL_PG_ORDER) - : PAGE_SIZE; + const struct sge *s = &adapter->sge; + + return (s->fl_pg_order > 0 && (sdesc->dma_addr & RX_LARGE_BUF) + ? (PAGE_SIZE << s->fl_pg_order) : PAGE_SIZE); } /** @@ -483,7 +482,8 @@ static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n) if (is_buf_mapped(sdesc)) dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc), - get_buf_size(sdesc), PCI_DMA_FROMDEVICE); + get_buf_size(adapter, sdesc), + PCI_DMA_FROMDEVICE); put_page(sdesc->page); sdesc->page = NULL; if (++fl->cidx == fl->size) @@ -511,7 +511,8 @@ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl) if (is_buf_mapped(sdesc)) dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc), - get_buf_size(sdesc), PCI_DMA_FROMDEVICE); + get_buf_size(adapter, sdesc), + PCI_DMA_FROMDEVICE); sdesc->page = NULL; if (++fl->cidx == fl->size) fl->cidx = 0; @@ -589,6 +590,7 @@ static inline void poison_buf(struct page *page, size_t sz) static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, int n, gfp_t gfp) { + struct sge *s = &adapter->sge; struct page *page; dma_addr_t dma_addr; unsigned int cred = fl->avail; @@ -608,12 +610,12 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, * If we don't support large pages, drop directly into the small page * allocation code. */ - if (FL_PG_ORDER == 0) + if (s->fl_pg_order == 0) goto alloc_small_pages; while (n) { page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, - FL_PG_ORDER); + s->fl_pg_order); if (unlikely(!page)) { /* * We've failed inour attempt to allocate a "large @@ -623,10 +625,10 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, fl->large_alloc_failed++; break; } - poison_buf(page, PAGE_SIZE << FL_PG_ORDER); + poison_buf(page, PAGE_SIZE << s->fl_pg_order); dma_addr = dma_map_page(adapter->pdev_dev, page, 0, - PAGE_SIZE << FL_PG_ORDER, + PAGE_SIZE << s->fl_pg_order, PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) { /* @@ -637,7 +639,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, * because DMA mapping resources are typically * critical resources once they become scarse. */ - __free_pages(page, FL_PG_ORDER); + __free_pages(page, s->fl_pg_order); goto out; } dma_addr |= RX_LARGE_BUF; @@ -693,7 +695,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, fl->pend_cred += cred; ring_fl_db(adapter, fl); - if (unlikely(fl_starving(fl))) { + if (unlikely(fl_starving(adapter, fl))) { smp_wmb(); set_bit(fl->cntxt_id, adapter->sge.starving_fl); } @@ -1468,6 +1470,8 @@ static void t4vf_pktgl_free(const struct pkt_gl *gl) static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, const struct cpl_rx_pkt *pkt) { + struct adapter *adapter = rxq->rspq.adapter; + struct sge *s = &adapter->sge; int ret; struct sk_buff *skb; @@ -1478,8 +1482,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, return; } - copy_frags(skb, gl, PKTSHIFT); - skb->len = gl->tot_len - PKTSHIFT; + copy_frags(skb, gl, s->pktshift); + skb->len = gl->tot_len - s->pktshift; skb->data_len = skb->len; skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1516,6 +1520,8 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, bool csum_ok = pkt->csum_calc && !pkt->err_vec && (rspq->netdev->features & NETIF_F_RXCSUM); struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq); + struct adapter *adapter = rspq->adapter; + struct sge *s = &adapter->sge; /* * If this is a good TCP packet and we have Generic Receive Offload @@ -1537,7 +1543,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, rxq->stats.rx_drops++; return 0; } - __skb_pull(skb, PKTSHIFT); + __skb_pull(skb, s->pktshift); skb->protocol = eth_type_trans(skb, rspq->netdev); skb_record_rx_queue(skb, rspq->idx); rxq->stats.pkts++; @@ -1648,6 +1654,8 @@ static inline void rspq_next(struct sge_rspq *rspq) static int process_responses(struct sge_rspq *rspq, int budget) { struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq); + struct adapter *adapter = rspq->adapter; + struct sge *s = &adapter->sge; int budget_left = budget; while (likely(budget_left)) { @@ -1697,7 +1705,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) BUG_ON(frag >= MAX_SKB_FRAGS); BUG_ON(rxq->fl.avail == 0); sdesc = &rxq->fl.sdesc[rxq->fl.cidx]; - bufsz = get_buf_size(sdesc); + bufsz = get_buf_size(adapter, sdesc); fp->page = sdesc->page; fp->offset = rspq->offset; fp->size = min(bufsz, len); @@ -1726,7 +1734,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) */ ret = rspq->handler(rspq, rspq->cur_desc, &gl); if (likely(ret == 0)) - rspq->offset += ALIGN(fp->size, FL_ALIGN); + rspq->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&gl, &rxq->fl, frag); } else if (likely(rsp_type == RSP_TYPE_CPL)) { @@ -1963,7 +1971,7 @@ static void sge_rx_timer_cb(unsigned long data) * schedule napi but the FL is no longer starving. * No biggie. */ - if (fl_starving(fl)) { + if (fl_starving(adapter, fl)) { struct sge_eth_rxq *rxq; rxq = container_of(fl, struct sge_eth_rxq, fl); @@ -2047,6 +2055,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, int intr_dest, struct sge_fl *fl, rspq_handler_t hnd) { + struct sge *s = &adapter->sge; struct port_info *pi = netdev_priv(dev); struct fw_iq_cmd cmd, rpl; int ret, iqandst, flsz = 0; @@ -2117,7 +2126,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, fl->size = roundup(fl->size, FL_PER_EQ_UNIT); fl->desc = alloc_ring(adapter->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), - &fl->addr, &fl->sdesc, STAT_LEN); + &fl->addr, &fl->sdesc, s->stat_len); if (!fl->desc) { ret = -ENOMEM; goto err; @@ -2129,7 +2138,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, * free list ring) in Egress Queue Units. */ flsz = (fl->size / FL_PER_EQ_UNIT + - STAT_LEN / EQ_UNIT); + s->stat_len / EQ_UNIT); /* * Fill in all the relevant firmware Ingress Queue Command @@ -2217,6 +2226,7 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *devq, unsigned int iqid) { + struct sge *s = &adapter->sge; int ret, nentries; struct fw_eq_eth_cmd cmd, rpl; struct port_info *pi = netdev_priv(dev); @@ -2225,7 +2235,7 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, * Calculate the size of the hardware TX Queue (including the Status * Page on the end of the TX Queue) in units of TX Descriptors. */ - nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc); + nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); /* * Allocate the hardware ring for the TX ring (with space for its @@ -2234,7 +2244,7 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, txq->q.desc = alloc_ring(adapter->pdev_dev, txq->q.size, sizeof(struct tx_desc), sizeof(struct tx_sw_desc), - &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN); + &txq->q.phys_addr, &txq->q.sdesc, s->stat_len); if (!txq->q.desc) return -ENOMEM; @@ -2307,8 +2317,10 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, */ static void free_txq(struct adapter *adapter, struct sge_txq *tq) { + struct sge *s = &adapter->sge; + dma_free_coherent(adapter->pdev_dev, - tq->size * sizeof(*tq->desc) + STAT_LEN, + tq->size * sizeof(*tq->desc) + s->stat_len, tq->desc, tq->phys_addr); tq->cntxt_id = 0; tq->sdesc = NULL; @@ -2322,6 +2334,7 @@ static void free_txq(struct adapter *adapter, struct sge_txq *tq) static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq, struct sge_fl *fl) { + struct sge *s = &adapter->sge; unsigned int flid = fl ? fl->cntxt_id : 0xffff; t4vf_iq_free(adapter, FW_IQ_TYPE_FL_INT_CAP, @@ -2337,7 +2350,7 @@ static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq, if (fl) { free_rx_bufs(adapter, fl, fl->avail); dma_free_coherent(adapter->pdev_dev, - fl->size * sizeof(*fl->desc) + STAT_LEN, + fl->size * sizeof(*fl->desc) + s->stat_len, fl->desc, fl->addr); kfree(fl->sdesc); fl->sdesc = NULL; @@ -2443,12 +2456,12 @@ int t4vf_sge_init(struct adapter *adapter) * Now translate the adapter parameters into our internal forms. */ if (fl1) - FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT; - STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK) - ? 128 : 64); - PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control); - FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + - SGE_INGPADBOUNDARY_SHIFT); + s->fl_pg_order = ilog2(fl1) - PAGE_SHIFT; + s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK) + ? 128 : 64); + s->pktshift = PKTSHIFT_GET(sge_params->sge_control); + s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + + SGE_INGPADBOUNDARY_SHIFT); /* * Set up tasklet timers. From ce8f407a3cc7fc58804b9135e7c8780f0f8c2a8d Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 7 Nov 2014 17:06:30 +0530 Subject: [PATCH 077/138] cxgb4/cxgb4vf: For T5 use Packing and Padding Boundaries for SGE DMA transfers T5 introduces the ability to have separate Packing and Padding Boundaries for SGE DMA transfers from the chip to Host Memory. This change set takes advantage of that to set up a smaller Padding Boundary to conserve PCI Link and Memory Bandwidth with T5. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 30 +++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 51 ++++++++++++++++--- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 10 ++++ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 31 ++++++++++- .../ethernet/chelsio/cxgb4vf/t4vf_common.h | 1 + .../net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 23 +++++++++ 6 files changed, 135 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 5e1b314e11af67..39f2b13e66c731 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2914,7 +2914,8 @@ static int t4_sge_init_hard(struct adapter *adap) int t4_sge_init(struct adapter *adap) { struct sge *s = &adap->sge; - u32 sge_control, sge_conm_ctrl; + u32 sge_control, sge_control2, sge_conm_ctrl; + unsigned int ingpadboundary, ingpackboundary; int ret, egress_threshold; /* @@ -2924,8 +2925,31 @@ int t4_sge_init(struct adapter *adap) sge_control = t4_read_reg(adap, SGE_CONTROL); s->pktshift = PKTSHIFT_GET(sge_control); s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64; - s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) + - X_INGPADBOUNDARY_SHIFT); + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. + */ + ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_control) + + X_INGPADBOUNDARY_SHIFT); + if (is_t4(adap->params.chip)) { + s->fl_align = ingpadboundary; + } else { + /* T5 has a different interpretation of one of the PCIe Packing + * Boundary values. + */ + sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A); + ingpackboundary = INGPACKBOUNDARY_G(sge_control2); + if (ingpackboundary == INGPACKBOUNDARY_16B_X) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + INGPACKBOUNDARY_SHIFT_X); + + s->fl_align = max(ingpadboundary, ingpackboundary); + } if (adap->flags & USING_SOFT_PARAMS) ret = t4_sge_init_soft(adap); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index a9d9d74e4f092f..163a2a14948cf8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3129,12 +3129,51 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, HOSTPAGESIZEPF6(sge_hps) | HOSTPAGESIZEPF7(sge_hps)); - t4_set_reg_field(adap, SGE_CONTROL, - INGPADBOUNDARY_MASK | - EGRSTATUSPAGESIZE_MASK, - INGPADBOUNDARY(fl_align_log - 5) | - EGRSTATUSPAGESIZE(stat_len != 64)); - + if (is_t4(adap->params.chip)) { + t4_set_reg_field(adap, SGE_CONTROL, + INGPADBOUNDARY_MASK | + EGRSTATUSPAGESIZE_MASK, + INGPADBOUNDARY(fl_align_log - 5) | + EGRSTATUSPAGESIZE(stat_len != 64)); + } else { + /* T5 introduced the separation of the Free List Padding and + * Packing Boundaries. Thus, we can select a smaller Padding + * Boundary to avoid uselessly chewing up PCIe Link and Memory + * Bandwidth, and use a Packing Boundary which is large enough + * to avoid false sharing between CPUs, etc. + * + * For the PCI Link, the smaller the Padding Boundary the + * better. For the Memory Controller, a smaller Padding + * Boundary is better until we cross under the Memory Line + * Size (the minimum unit of transfer to/from Memory). If we + * have a Padding Boundary which is smaller than the Memory + * Line Size, that'll involve a Read-Modify-Write cycle on the + * Memory Controller which is never good. For T5 the smallest + * Padding Boundary which we can select is 32 bytes which is + * larger than any known Memory Controller Line Size so we'll + * use that. + * + * T5 has a different interpretation of the "0" value for the + * Packing Boundary. This corresponds to 16 bytes instead of + * the expected 32 bytes. We never have a Packing Boundary + * less than 32 bytes so we can't use that special value but + * on the other hand, if we wanted 32 bytes, the best we can + * really do is 64 bytes. + */ + if (fl_align <= 32) { + fl_align = 64; + fl_align_log = 6; + } + t4_set_reg_field(adap, SGE_CONTROL, + INGPADBOUNDARY_MASK | + EGRSTATUSPAGESIZE_MASK, + INGPADBOUNDARY(INGPCIEBOUNDARY_32B_X) | + EGRSTATUSPAGESIZE(stat_len != 64)); + t4_set_reg_field(adap, SGE_CONTROL2_A, + INGPACKBOUNDARY_V(INGPACKBOUNDARY_M), + INGPACKBOUNDARY_V(fl_align_log - + INGPACKBOUNDARY_SHIFT_X)); + } /* * Adjust various SGE Free List Host Buffer Sizes. * diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index a1024db5dc136b..8d2de1006b0845 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -95,6 +95,7 @@ #define X_INGPADBOUNDARY_SHIFT 5 #define SGE_CONTROL 0x1008 +#define SGE_CONTROL2_A 0x1124 #define DCASYSTYPE 0x00080000U #define RXPKTCPLMODE_MASK 0x00040000U #define RXPKTCPLMODE_SHIFT 18 @@ -106,6 +107,7 @@ #define PKTSHIFT_SHIFT 10 #define PKTSHIFT(x) ((x) << PKTSHIFT_SHIFT) #define PKTSHIFT_GET(x) (((x) & PKTSHIFT_MASK) >> PKTSHIFT_SHIFT) +#define INGPCIEBOUNDARY_32B_X 0 #define INGPCIEBOUNDARY_MASK 0x00000380U #define INGPCIEBOUNDARY_SHIFT 7 #define INGPCIEBOUNDARY(x) ((x) << INGPCIEBOUNDARY_SHIFT) @@ -114,6 +116,14 @@ #define INGPADBOUNDARY(x) ((x) << INGPADBOUNDARY_SHIFT) #define INGPADBOUNDARY_GET(x) (((x) & INGPADBOUNDARY_MASK) \ >> INGPADBOUNDARY_SHIFT) +#define INGPACKBOUNDARY_16B_X 0 +#define INGPACKBOUNDARY_SHIFT_X 5 + +#define INGPACKBOUNDARY_S 16 +#define INGPACKBOUNDARY_M 0x7U +#define INGPACKBOUNDARY_V(x) ((x) << INGPACKBOUNDARY_S) +#define INGPACKBOUNDARY_G(x) (((x) >> INGPACKBOUNDARY_S) \ + & INGPACKBOUNDARY_M) #define EGRPCIEBOUNDARY_MASK 0x0000000eU #define EGRPCIEBOUNDARY_SHIFT 1 #define EGRPCIEBOUNDARY(x) ((x) << EGRPCIEBOUNDARY_SHIFT) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index a18830d8aa6d10..cd5b7896cb67a9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2436,6 +2436,7 @@ int t4vf_sge_init(struct adapter *adapter) u32 fl0 = sge_params->sge_fl_buffer_size[0]; u32 fl1 = sge_params->sge_fl_buffer_size[1]; struct sge *s = &adapter->sge; + unsigned int ingpadboundary, ingpackboundary; /* * Start by vetting the basic SGE parameters which have been set up by @@ -2460,8 +2461,34 @@ int t4vf_sge_init(struct adapter *adapter) s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64); s->pktshift = PKTSHIFT_GET(sge_params->sge_control); - s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + - SGE_INGPADBOUNDARY_SHIFT); + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. (Note that it makes no real practical sense to + * have the Pading Boudary be larger than the Packing Boundary but you + * could set the chip up that way and, in fact, legacy T4 code would + * end doing this because it would initialize the Padding Boundary and + * leave the Packing Boundary initialized to 0 (16 bytes).) + */ + ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + + X_INGPADBOUNDARY_SHIFT); + if (is_t4(adapter->params.chip)) { + s->fl_align = ingpadboundary; + } else { + /* T5 has a different interpretation of one of the PCIe Packing + * Boundary values. + */ + ingpackboundary = INGPACKBOUNDARY_G(sge_params->sge_control2); + if (ingpackboundary == INGPACKBOUNDARY_16B_X) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + INGPACKBOUNDARY_SHIFT_X); + + s->fl_align = max(ingpadboundary, ingpackboundary); + } /* * Set up tasklet timers. diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 95df61dcb4ce57..b5c301d9565e8f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -134,6 +134,7 @@ struct dev_params { */ struct sge_params { u32 sge_control; /* padding, boundaries, lengths, etc. */ + u32 sge_control2; /* T5: more of the same */ u32 sge_host_page_size; /* RDMA page sizes */ u32 sge_queues_per_page; /* RDMA queues/page */ u32 sge_user_mode_limits; /* limits for BAR2 user mode accesses */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index e984fdc48ba2e3..dc30d285285094 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -468,6 +468,29 @@ int t4vf_get_sge_params(struct adapter *adapter) sge_params->sge_timer_value_2_and_3 = vals[5]; sge_params->sge_timer_value_4_and_5 = vals[6]; + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately with the Padding Boundary in SGE_CONTROL and and Packing + * Boundary in SGE_CONTROL2. So for T5 and later we need to grab + * SGE_CONTROL in order to determine how ingress packet data will be + * laid out in Packed Buffer Mode. Unfortunately, older versions of + * the firmware won't let us retrieve SGE_CONTROL2 so if we get a + * failure grabbing it we throw an error since we can't figure out the + * right value. + */ + if (!is_t4(adapter->params.chip)) { + params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + FW_PARAMS_PARAM_XYZ(SGE_CONTROL2_A)); + v = t4vf_query_params(adapter, 1, params, vals); + if (v != FW_SUCCESS) { + dev_err(adapter->pdev_dev, + "Unable to get SGE Control2; " + "probably old firmware.\n"); + return v; + } + sge_params->sge_control2 = vals[0]; + } + params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | FW_PARAMS_PARAM_XYZ(SGE_INGRESS_RX_THRESHOLD)); v = t4vf_query_params(adapter, 1, params, vals); From 50d21a662d6d3155132edf34f72161a59675c02c Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 7 Nov 2014 17:06:31 +0530 Subject: [PATCH 078/138] cxgb4vf: FL Starvation Threshold needs to be larger than the SGE's Egress Congestion Threshold Free List Starvation Threshold needs to be larger than the SGE's Egress Congestion Threshold or we'll end up in a mutual stall where the driver waits for Ingress Packets to drive replacing Free List Pointers and the SGE waits for Free List Pointers before pushing Ingress Packets to the host. Based on original work by Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 16 ++++++++++------ .../net/ethernet/chelsio/cxgb4vf/t4vf_common.h | 1 + drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 5 ++++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index cd5b7896cb67a9..fdd078d7d82c66 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -93,12 +93,6 @@ enum { TX_QCHECK_PERIOD = (HZ / 2), MAX_TIMER_TX_RECLAIM = 100, - /* - * An FL with <= FL_STARVE_THRES buffers is starving and a periodic - * timer will attempt to refill it. - */ - FL_STARVE_THRES = 4, - /* * Suspend an Ethernet TX queue with fewer available descriptors than * this. We always want to have room for a maximum sized packet: @@ -2490,6 +2484,16 @@ int t4vf_sge_init(struct adapter *adapter) s->fl_align = max(ingpadboundary, ingpackboundary); } + /* A FL with <= fl_starve_thres buffers is starving and a periodic + * timer will attempt to refill it. This needs to be larger than the + * SGE's Egress Congestion Threshold. If it isn't, then we can get + * stuck waiting for new packets while the SGE is waiting for us to + * give it more Free List entries. (Note that the SGE's Egress + * Congestion Threshold is in units of 2 Free List pointers.) + */ + s->fl_starve_thres + = EGRTHRESHOLD_GET(sge_params->sge_congestion_control)*2 + 1; + /* * Set up tasklet timers. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index b5c301d9565e8f..4b6a6d14d86d98 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -140,6 +140,7 @@ struct sge_params { u32 sge_user_mode_limits; /* limits for BAR2 user mode accesses */ u32 sge_fl_buffer_size[16]; /* free list buffer sizes */ u32 sge_ingress_rx_threshold; /* RX counter interrupt threshold[4] */ + u32 sge_congestion_control; /* congestion thresholds, etc. */ u32 sge_timer_value_0_and_1; /* interrupt coalescing timer values */ u32 sge_timer_value_2_and_3; u32 sge_timer_value_4_and_5; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index dc30d285285094..1e896b923234f6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -493,10 +493,13 @@ int t4vf_get_sge_params(struct adapter *adapter) params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | FW_PARAMS_PARAM_XYZ(SGE_INGRESS_RX_THRESHOLD)); - v = t4vf_query_params(adapter, 1, params, vals); + params[1] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + FW_PARAMS_PARAM_XYZ(SGE_CONM_CTRL)); + v = t4vf_query_params(adapter, 2, params, vals); if (v) return v; sge_params->sge_ingress_rx_threshold = vals[0]; + sge_params->sge_congestion_control = vals[1]; return 0; } From cfdf1e1ba5bf55e095cf4bcaa9585c4759f239e8 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 10 Nov 2014 11:45:13 -0800 Subject: [PATCH 079/138] udptunnel: Add SKB_GSO_UDP_TUNNEL during gro_complete. When doing GRO processing for UDP tunnels, we never add SKB_GSO_UDP_TUNNEL to gso_type - only the type of the inner protocol is added (such as SKB_GSO_TCPV4). The result is that if the packet is later resegmented we will do GSO but not treat it as a tunnel. This results in UDP fragmentation of the outer header instead of (i.e.) TCP segmentation of the inner header as was originally on the wire. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 ++ include/net/udp_tunnel.h | 9 +++++++++ net/ipv4/fou.c | 2 ++ 3 files changed, 13 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ca309820d39e1b..cfb892b265e8b6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -621,6 +621,8 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr); int err = -ENOSYS; + udp_tunnel_gro_complete(skb, nhoff); + eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr)); type = eh->h_proto; diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a47790bcaa3831..2a50a70ef5870c 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -100,6 +100,15 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, return iptunnel_handle_offloads(skb, udp_csum, type); } +static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); + skb_shinfo(skb)->gso_type |= uh->check ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; +} + static inline void udp_tunnel_encap_enable(struct socket *sock) { #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 32e78924e246bb..606c520ffd5af4 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOSYS; const struct net_offload **offloads; + udp_tunnel_gro_complete(skb, nhoff); + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); From a815286b94875c0428444e036df7e4e1a070bec0 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Fri, 7 Nov 2014 15:41:21 -0800 Subject: [PATCH 080/138] cxgb4 : Fix bug in DCB app deletion Unlike CEE, IEEE has a bespoke app delete call and does not rely on priority for app deletion Fixes : 2376c879b80c ('cxgb4 : Improve handling of DCB negotiation or loss thereof') Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6fe300e316c3c4..b6fdb142eacbf7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -79,8 +79,9 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev) app.protocol = dcb->app_priority[i].protocolid; if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) { + app.priority = dcb->app_priority[i].user_prio_map; app.selector = dcb->app_priority[i].sel_field + 1; - err = dcb_ieee_setapp(dev, &app); + err = dcb_ieee_delapp(dev, &app); } else { app.selector = !!(dcb->app_priority[i].sel_field); err = dcb_setapp(dev, &app); From 92697dc9471b8655a2f1203fa351bc37b2d46a26 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Mon, 10 Nov 2014 17:40:02 -0500 Subject: [PATCH 081/138] scsi: Fix more error handling in SCSI_IOCTL_SEND_COMMAND Fix an error path in SCSI_IOCTL_SEND_COMMAND that calls blk_put_request(rq) on an invalid IS_ERR(rq) pointer. Fixes: a492f075450f ("block,scsi: fixup blk_get_request dead queue scenarios") Signed-off-by: Tony Battersby Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 1e053d911240b5..b0c2a616c8f9b8 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -458,7 +458,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto error; + goto error_free_buffer; } blk_rq_set_block_pc(rq); @@ -531,9 +531,11 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } error: + blk_put_request(rq); + +error_free_buffer: kfree(buffer); - if (rq) - blk_put_request(rq); + return err; } EXPORT_SYMBOL_GPL(sg_scsi_ioctl); From 0cdbcd6d3ebeffa7d1a475ed7a294315cd046a11 Mon Sep 17 00:00:00 2001 From: Giedrius Statkevicius Date: Mon, 10 Nov 2014 20:59:42 +0200 Subject: [PATCH 082/138] platform: hp_accel: Add SERIO_I8042 as a dependency since it now includes i8042.h/serio.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make hp_accel dependent on SERIO_I8042 in the Kconfig because since commit a4c724d0723b078e4ab4670e557cda1795036a7a ('platform: hp_accel: add a i8042 filter to remove HPQ6000 data from kb bus stream') hp_accel includes i8042.h and serio.h. Reported-by: Jim Davis Signed-off-by: Giedrius Statkevičius Signed-off-by: Darren Hart --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 4dcfb7116a0487..a2eabe6ff9ada9 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -202,6 +202,7 @@ config TC1100_WMI config HP_ACCEL tristate "HP laptop accelerometer" depends on INPUT && ACPI + depends on SERIO_I8042 select SENSORS_LIS3LV02D select NEW_LEDS select LEDS_CLASS From 1a290581ded60e87276741f8ca97b161d2b226fc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 11 Nov 2014 15:45:57 +0100 Subject: [PATCH 083/138] ALSA: usb-audio: Fix memory leak in FTU quirk M-audio FastTrack Ultra quirk doesn't release the kzalloc'ed memory. This patch adds the private_free callback to release it properly. Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index f119a41ed9a949..7c83bab69deef8 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -885,6 +885,11 @@ static int snd_ftu_eff_switch_put(struct snd_kcontrol *kctl, return changed; } +static void kctl_private_value_free(struct snd_kcontrol *kctl) +{ + kfree((void *)kctl->private_value); +} + static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, int validx, int bUnitID) { @@ -919,6 +924,7 @@ static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, return -ENOMEM; } + kctl->private_free = kctl_private_value_free; err = snd_ctl_add(mixer->chip->card, kctl); if (err < 0) return err; From f4a1edd56120249198073aa4a373b77e3700ac8f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 9 Nov 2014 14:25:39 +0200 Subject: [PATCH 084/138] net/mlx4_en: Advertize encapsulation offloads features only when VXLAN tunnel is set Currenly we only support Large-Send and TX checksum offloads for encapsulated traffic of type VXLAN. We must make sure to advertize these offloads up to the stack only when VXLAN tunnel is set. Failing to do so, would mislead the the networking stack to assume that the driver can offload the internal TX checksum for GRE packets and other buggy schemes. Reported-by: Florian Westphal Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f3032fec8fce03..02266e3de514f2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2281,8 +2281,16 @@ static void mlx4_en_add_vxlan_offloads(struct work_struct *work) ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1); out: - if (ret) + if (ret) { en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); + return; + } + + /* set offloads */ + priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; + priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2290,6 +2298,11 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) int ret; struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, vxlan_del_task); + /* unset offloads */ + priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); + priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; + priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2568,13 +2581,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) dev->priv_flags |= IFF_UNICAST_FLT; - if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - dev->features |= NETIF_F_GSO_UDP_TUNNEL; - } - mdev->pndev[port] = dev; netif_carrier_off(dev); From 5748eb8f8e989a9da1ac7c96dc73d68cbdedf7df Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 10 Nov 2014 11:50:21 +0100 Subject: [PATCH 085/138] net: ppp: Don't call bpf_prog_create() in ppp_lock In ppp_ioctl(), bpf_prog_create() is called inside ppp_lock, which eventually calls vmalloc() and hits BUG_ON() in vmalloc.c. This patch works around the problem by moving the allocation outside the lock. The bug was revealed by the recent change in net/core/filter.c, as it allocates via vmalloc() instead of kmalloc() now. Reported-and-tested-by: Stefan Seyfried Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 68c3a3f4e0abe5..794a4732936883 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -755,23 +755,23 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { + struct bpf_prog *pass_filter = NULL; struct sock_fprog_kern fprog = { .len = err, .filter = code, }; - ppp_lock(ppp); - if (ppp->pass_filter) { - bpf_prog_destroy(ppp->pass_filter); - ppp->pass_filter = NULL; + err = 0; + if (fprog.filter) + err = bpf_prog_create(&pass_filter, &fprog); + if (!err) { + ppp_lock(ppp); + if (ppp->pass_filter) + bpf_prog_destroy(ppp->pass_filter); + ppp->pass_filter = pass_filter; + ppp_unlock(ppp); } - if (fprog.filter != NULL) - err = bpf_prog_create(&ppp->pass_filter, - &fprog); - else - err = 0; kfree(code); - ppp_unlock(ppp); } break; } @@ -781,23 +781,23 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { + struct bpf_prog *active_filter = NULL; struct sock_fprog_kern fprog = { .len = err, .filter = code, }; - ppp_lock(ppp); - if (ppp->active_filter) { - bpf_prog_destroy(ppp->active_filter); - ppp->active_filter = NULL; + err = 0; + if (fprog.filter) + err = bpf_prog_create(&active_filter, &fprog); + if (!err) { + ppp_lock(ppp); + if (ppp->active_filter) + bpf_prog_destroy(ppp->active_filter); + ppp->active_filter = active_filter; + ppp_unlock(ppp); } - if (fprog.filter != NULL) - err = bpf_prog_create(&ppp->active_filter, - &fprog); - else - err = 0; kfree(code); - ppp_unlock(ppp); } break; } From e40607cbe270a9e8360907cb1e62ddf0736e4864 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 17:54:26 +0100 Subject: [PATCH 086/138] net: sctp: fix NULL pointer dereference in af->from_addr_param on malformed packet An SCTP server doing ASCONF will panic on malformed INIT ping-of-death in the form of: ------------ INIT[PARAM: SET_PRIMARY_IP] ------------> While the INIT chunk parameter verification dissects through many things in order to detect malformed input, it misses to actually check parameters inside of parameters. E.g. RFC5061, section 4.2.4 proposes a 'set primary IP address' parameter in ASCONF, which has as a subparameter an address parameter. So an attacker may send a parameter type other than SCTP_PARAM_IPV4_ADDRESS or SCTP_PARAM_IPV6_ADDRESS, param_type2af() will subsequently return 0 and thus sctp_get_af_specific() returns NULL, too, which we then happily dereference unconditionally through af->from_addr_param(). The trace for the log: BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 IP: [] sctp_process_init+0x492/0x990 [sctp] PGD 0 Oops: 0000 [#1] SMP [...] Pid: 0, comm: swapper Not tainted 2.6.32-504.el6.x86_64 #1 Bochs Bochs RIP: 0010:[] [] sctp_process_init+0x492/0x990 [sctp] [...] Call Trace: [] ? sctp_bind_addr_copy+0x5d/0xe0 [sctp] [] sctp_sf_do_5_1B_init+0x21b/0x340 [sctp] [] sctp_do_sm+0x71/0x1210 [sctp] [] ? sctp_endpoint_lookup_assoc+0xc9/0xf0 [sctp] [] sctp_endpoint_bh_rcv+0x116/0x230 [sctp] [] sctp_inq_push+0x56/0x80 [sctp] [] sctp_rcv+0x982/0xa10 [sctp] [] ? ipt_local_in_hook+0x23/0x28 [iptable_filter] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [...] A minimal way to address this is to check for NULL as we do on all other such occasions where we know sctp_get_af_specific() could possibly return with NULL. Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ab734be8cb2098..9f32741abb1c7b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2609,6 +2609,9 @@ static int sctp_process_param(struct sctp_association *asoc, addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); From 4184b2a79a7612a9272ce20d639934584a1f3786 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 18:00:09 +0100 Subject: [PATCH 087/138] net: sctp: fix memory leak in auth key management A very minimal and simple user space application allocating an SCTP socket, setting SCTP_AUTH_KEY setsockopt(2) on it and then closing the socket again will leak the memory containing the authentication key from user space: unreferenced object 0xffff8800837047c0 (size 16): comm "a.out", pid 2789, jiffies 4296954322 (age 192.258s) hex dump (first 16 bytes): 01 00 00 00 04 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc+0xe8/0x270 [] sctp_auth_create_key+0x23/0x50 [sctp] [] sctp_auth_set_key+0xa1/0x140 [sctp] [] sctp_setsockopt+0xd03/0x1180 [sctp] [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] system_call_fastpath+0x12/0x17 [] 0xffffffffffffffff This is bad because of two things, we can bring down a machine from user space when auth_enable=1, but also we would leave security sensitive keying material in memory without clearing it after use. The issue is that sctp_auth_create_key() already sets the refcount to 1, but after allocation sctp_auth_set_key() does an additional refcount on it, and thus leaving it around when we free the socket. Fixes: 65b07e5d0d0 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 0e8529113dc5a0..fb7976aee61c84 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -862,8 +862,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) From 5337b5b75cd9bd3624a6820e3c2a084d2480061c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Nov 2014 17:54:25 -0800 Subject: [PATCH 088/138] ipv6: fix IPV6_PKTINFO with v4 mapped Use IS_ENABLED(CONFIG_IPV6), to enable this code if IPv6 is a module. Signed-off-by: Eric Dumazet Fixes: c8e6ad0829a7 ("ipv6: honor IPV6_PKTINFO with v4 mapped addresses on sendmsg") Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c373a9ad455568..9daf2177dc005c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -195,7 +195,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; -#if defined(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { From 79ce0477ffe82e7e49e55179cd176a1c33382744 Mon Sep 17 00:00:00 2001 From: Brian Hill Date: Tue, 11 Nov 2014 13:39:39 -0700 Subject: [PATCH 089/138] net: phy: Correctly handle MII ioctl which changes autonegotiation. When advertised capabilities are changed with mii-tool, such as: mii-tool -A 10baseT the existing handler has two errors. - An actual PHY register value is provided by mii-tool, and this must be mapped to internal state with mii_adv_to_ethtool_adv_t(). - The PHY state machine needs to be told that autonegotiation has again been performed. If not, the MAC will not be notified of the new link speed and duplex, resulting in a possible config mismatch. Signed-off-by: Brian Hill Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1dfffdc9dfc357..767cd110f49688 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -352,6 +352,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *mii_data = if_mii(ifr); u16 val = mii_data->val_in; + bool change_autoneg = false; switch (cmd) { case SIOCGMIIPHY: @@ -367,22 +368,29 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) if (mii_data->phy_id == phydev->addr) { switch (mii_data->reg_num) { case MII_BMCR: - if ((val & (BMCR_RESET | BMCR_ANENABLE)) == 0) + if ((val & (BMCR_RESET | BMCR_ANENABLE)) == 0) { + if (phydev->autoneg == AUTONEG_ENABLE) + change_autoneg = true; phydev->autoneg = AUTONEG_DISABLE; - else + if (val & BMCR_FULLDPLX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + if (val & BMCR_SPEED1000) + phydev->speed = SPEED_1000; + else if (val & BMCR_SPEED100) + phydev->speed = SPEED_100; + else phydev->speed = SPEED_10; + } + else { + if (phydev->autoneg == AUTONEG_DISABLE) + change_autoneg = true; phydev->autoneg = AUTONEG_ENABLE; - if (!phydev->autoneg && (val & BMCR_FULLDPLX)) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - if (!phydev->autoneg && (val & BMCR_SPEED1000)) - phydev->speed = SPEED_1000; - else if (!phydev->autoneg && - (val & BMCR_SPEED100)) - phydev->speed = SPEED_100; + } break; case MII_ADVERTISE: - phydev->advertising = val; + phydev->advertising = mii_adv_to_ethtool_adv_t(val); + change_autoneg = true; break; default: /* do nothing */ @@ -396,6 +404,10 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) if (mii_data->reg_num == MII_BMCR && val & BMCR_RESET) return phy_init_hw(phydev); + + if (change_autoneg) + return phy_start_aneg(phydev); + return 0; case SIOCSHWTSTAMP: From 67732cd34382066ae5df313b6dad65ab14b9735f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 11 Nov 2014 11:07:08 +0100 Subject: [PATCH 090/138] PM / Domains: Fix initial default state of the need_restore flag The initial state of the device's need_restore flag should'nt depend on the current state of the PM domain. For example it should be perfectly valid to attach an inactive device to a powered PM domain. The pm_genpd_dev_need_restore() API allow us to update the need_restore flag to somewhat cope with such scenarios. Typically that should have been done from drivers/buses ->probe() since it's those that put the requirements on the value of the need_restore flag. Until recently, the Exynos SOCs were the only user of the pm_genpd_dev_need_restore() API, though invoking it from a centralized location while adding devices to their PM domains. Due to that Exynos now have swithed to the generic OF-based PM domain look-up, it's no longer possible to invoke the API from a centralized location. The reason is because devices are now added to their PM domains during the probe sequence. Commit "ARM: exynos: Move to generic PM domain DT bindings" did the switch for Exynos to the generic OF-based PM domain look-up, but it also removed the call to pm_genpd_dev_need_restore(). This caused a regression for some of the Exynos drivers. To handle things more properly in the generic PM domain, let's change the default initial value of the need_restore flag to reflect that the state is unknown. As soon as some of the runtime PM callbacks gets invoked, update the initial value accordingly. Moreover, since the generic PM domain is verifying that all devices are both runtime PM enabled and suspended, using pm_runtime_suspended() while pm_genpd_poweroff() is invoked from the scheduled work, we can be sure of that the PM domain won't be powering off while having active devices. Do note that, the generic PM domain can still only know about active devices which has been activated through invoking its runtime PM resume callback. In other words, buses/drivers using pm_runtime_set_active() during ->probe() will still suffer from a race condition, potentially probing a device without having its PM domain being powered. That issue will have to be solved using a different approach. This a log from the boot regression for Exynos5, which is being fixed in this patch. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 308 at ../drivers/clk/clk.c:851 clk_disable+0x24/0x30() Modules linked in: CPU: 0 PID: 308 Comm: kworker/0:1 Not tainted 3.18.0-rc3-00569-gbd9449f-dirty #10 Workqueue: pm pm_runtime_work [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (warn_slowpath_common+0x64/0x88) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (clk_disable+0x24/0x30) [] (clk_disable) from [] (gsc_runtime_suspend+0x128/0x160) [] (gsc_runtime_suspend) from [] (pm_generic_runtime_suspend+0x2c/0x38) [] (pm_generic_runtime_suspend) from [] (pm_genpd_default_save_state+0x2c/0x8c) [] (pm_genpd_default_save_state) from [] (pm_genpd_poweroff+0x224/0x3ec) [] (pm_genpd_poweroff) from [] (pm_genpd_runtime_suspend+0x9c/0xcc) [] (pm_genpd_runtime_suspend) from [] (__rpm_callback+0x2c/0x60) [] (__rpm_callback) from [] (rpm_callback+0x20/0x74) [] (rpm_callback) from [] (rpm_suspend+0xd4/0x43c) [] (rpm_suspend) from [] (pm_runtime_work+0x80/0x90) [] (pm_runtime_work) from [] (process_one_work+0x12c/0x314) [] (process_one_work) from [] (worker_thread+0x3c/0x4b0) [] (worker_thread) from [] (kthread+0xcc/0xe8) [] (kthread) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 40cd58bcd6988f12 ]--- Fixes: a4a8c2c4962bb655 (ARM: exynos: Move to generic PM domain DT bindings) Reported-and-tested0by: Sylwester Nawrocki Reviewed-by: Sylwester Nawrocki Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 38 +++++++++++++++++++++++++++++++------ include/linux/pm_domain.h | 2 +- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index b520687046d4f3..fb83d4acd400ef 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -361,9 +361,19 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, struct device *dev = pdd->dev; int ret = 0; - if (gpd_data->need_restore) + if (gpd_data->need_restore > 0) return 0; + /* + * If the value of the need_restore flag is still unknown at this point, + * we trust that pm_genpd_poweroff() has verified that the device is + * already runtime PM suspended. + */ + if (gpd_data->need_restore < 0) { + gpd_data->need_restore = 1; + return 0; + } + mutex_unlock(&genpd->lock); genpd_start_dev(genpd, dev); @@ -373,7 +383,7 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_lock(&genpd->lock); if (!ret) - gpd_data->need_restore = true; + gpd_data->need_restore = 1; return ret; } @@ -389,12 +399,17 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, { struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; - bool need_restore = gpd_data->need_restore; + int need_restore = gpd_data->need_restore; - gpd_data->need_restore = false; + gpd_data->need_restore = 0; mutex_unlock(&genpd->lock); genpd_start_dev(genpd, dev); + + /* + * Call genpd_restore_dev() for recently added devices too (need_restore + * is negative then). + */ if (need_restore) genpd_restore_dev(genpd, dev); @@ -603,6 +618,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; + struct generic_pm_domain_data *gpd_data; bool (*stop_ok)(struct device *__dev); int ret; @@ -628,6 +644,16 @@ static int pm_genpd_runtime_suspend(struct device *dev) return 0; mutex_lock(&genpd->lock); + + /* + * If we have an unknown state of the need_restore flag, it means none + * of the runtime PM callbacks has been invoked yet. Let's update the + * flag to reflect that the current state is active. + */ + gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); + if (gpd_data->need_restore < 0) + gpd_data->need_restore = 0; + genpd->in_progress++; pm_genpd_poweroff(genpd); genpd->in_progress--; @@ -1442,7 +1468,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, mutex_lock(&gpd_data->lock); gpd_data->base.dev = dev; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); - gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF; + gpd_data->need_restore = -1; gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = -1; mutex_unlock(&gpd_data->lock); @@ -1546,7 +1572,7 @@ void pm_genpd_dev_need_restore(struct device *dev, bool val) psd = dev_to_psd(dev); if (psd && psd->domain_data) - to_gpd_data(psd->domain_data)->need_restore = val; + to_gpd_data(psd->domain_data)->need_restore = val ? 1 : 0; spin_unlock_irqrestore(&dev->power.lock, flags); } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b3ed7766a29196..2e0e06daf8c069 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -106,7 +106,7 @@ struct generic_pm_domain_data { struct notifier_block nb; struct mutex lock; unsigned int refcount; - bool need_restore; + int need_restore; }; #ifdef CONFIG_PM_GENERIC_DOMAINS From 48eb5b9c3dd2768b6a4de9c1eab606820fd84192 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Nov 2014 10:22:05 -0800 Subject: [PATCH 091/138] ixgbe: phy: fix uninitialized status in ixgbe_setup_phy_link_tnx Status variable is never initialized, can carry an arbitrary value on the stack and thus may let the function fail. Fixes: e90dd2645664 ("ixgbe: Make return values more direct") Signed-off-by: Daniel Borkmann Acked-by: Emil Tantilov Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index d47b19f27c355c..28b81ae09b5ae2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -635,7 +635,6 @@ s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed, **/ s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) { - s32 status; u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; bool autoneg = false; ixgbe_link_speed speed; @@ -700,8 +699,7 @@ s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_AN, autoneg_reg); - - return status; + return 0; } /** From 93ecd2607f55f99ec0022f53f8224566c8ec797f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 11 Nov 2014 22:38:00 +0000 Subject: [PATCH 092/138] net: qualcomm: Fix dependency This patch removes the dependency of the VENDOR entry and fixes the QCA7000 one. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index f3a47147937d46..9a49f42ac2ba4a 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -5,7 +5,6 @@ config NET_VENDOR_QUALCOMM bool "Qualcomm devices" default y - depends on SPI_MASTER && OF_GPIO ---help--- If you have a network (Ethernet) card belonging to this class, say Y and read the Ethernet-HOWTO, available from @@ -20,7 +19,7 @@ if NET_VENDOR_QUALCOMM config QCA7000 tristate "Qualcomm Atheros QCA7000 support" - depends on SPI_MASTER && OF_GPIO + depends on SPI_MASTER && OF ---help--- This SPI protocol driver supports the Qualcomm Atheros QCA7000. From c96e731c93ff0c9f53442c11c68e50fd07929d27 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 10 Nov 2014 18:06:20 -0800 Subject: [PATCH 093/138] net: bcmgenet: connect and disconnect from the PHY state machine phy_disconnect() is the only way to guarantee that we are not going to schedule more work on the PHY state machine workqueue for that particular PHY device. This fixes an issue where a network interface was suspended prior to a system suspend/resume cycle and would then be resumed as part of mdio_bus_resume(), since the GENET interface clocks would have been disabled, this basically resulted in bus errors to appear since we are invoking the GENET driver adjust_link() callback. Fixes: b6e978e50444 ("net: bcmgenet: add suspend/resume callbacks") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++++ drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index fdc9ec09e45351..34055fd59c7448 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2140,6 +2140,9 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } + phy_connect_direct(dev, priv->phydev, bcmgenet_mii_setup, + priv->phy_interface); + bcmgenet_netif_start(dev); return 0; @@ -2184,6 +2187,9 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); + /* Really kill the PHY state machine and disconnect from it */ + phy_disconnect(priv->phydev); + /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index dbf524ea3b1956..44b55b8c220fba 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -620,6 +620,7 @@ int bcmgenet_mii_init(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_mii_reset(struct net_device *dev); +void bcmgenet_mii_setup(struct net_device *dev); /* Wake-on-LAN routines */ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 9ff799a9f8019d..9c5fee782ccf85 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -77,7 +77,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, /* setup netdev link state when PHY link status change and * update UMAC and RGMII block when link up */ -static void bcmgenet_mii_setup(struct net_device *dev) +void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = priv->phydev; From dbd479db79572067a4c031f84c204ba30d0256ef Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 10 Nov 2014 18:06:21 -0800 Subject: [PATCH 094/138] net: bcmgenet: apply MII configuration in bcmgenet_open() In case an interface has been brought down before entering S3, and then brought up out of S3, all the initialization done during bcmgenet_probe() by bcmgenet_mii_init() calling bcmgenet_mii_config() is just lost since register contents are restored to their reset values. Re-apply this configuration anytime we call bcmgenet_open() to make sure our port multiplexer is properly configured to match the PHY interface. Since we are now calling bcmgenet_mii_config() everytime bcmgenet_open() is called, make sure we only print the message during initialization time not to pollute the console. Fixes: b6e978e50444 ("net: bcmgenet: add suspend/resume callbacks") Fixes: 1c1008c793fa4 ("net: bcmgenet: add main driver file") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 5 ++++- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 7 ++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 34055fd59c7448..da1a2500c91ce4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2140,6 +2140,9 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } + /* Re-configure the port multiplexer towards the PHY device */ + bcmgenet_mii_config(priv->dev, false); + phy_connect_direct(dev, priv->phydev, bcmgenet_mii_setup, priv->phy_interface); @@ -2691,7 +2694,7 @@ static int bcmgenet_resume(struct device *d) phy_init_hw(priv->phydev); /* Speed settings must be restored */ - bcmgenet_mii_config(priv->dev); + bcmgenet_mii_config(priv->dev, false); /* disable ethernet MAC while updating its registers */ umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 44b55b8c220fba..31b2da5f9b8213 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -617,7 +617,7 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); /* MDIO routines */ int bcmgenet_mii_init(struct net_device *dev); -int bcmgenet_mii_config(struct net_device *dev); +int bcmgenet_mii_config(struct net_device *dev, bool init); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_mii_reset(struct net_device *dev); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 9c5fee782ccf85..933cd7e7cd3370 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -211,7 +211,7 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); } -int bcmgenet_mii_config(struct net_device *dev) +int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = priv->phydev; @@ -298,7 +298,8 @@ int bcmgenet_mii_config(struct net_device *dev) return -EINVAL; } - dev_info(kdev, "configuring instance for %s\n", phy_name); + if (init) + dev_info(kdev, "configuring instance for %s\n", phy_name); return 0; } @@ -350,7 +351,7 @@ static int bcmgenet_mii_probe(struct net_device *dev) * PHY speed which is needed for bcmgenet_mii_config() to configure * things appropriately. */ - ret = bcmgenet_mii_config(dev); + ret = bcmgenet_mii_config(dev, true); if (ret) { phy_disconnect(priv->phydev); return ret; From 7f60dcaaf91e911002007c7ae885ff6ef0f36c0d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 12 Nov 2014 00:15:41 +0800 Subject: [PATCH 095/138] block: blk-merge: fix blk_recount_segments() For cloned bio, bio->bi_vcnt can't be used at all, and we have resort to bio_segments() to figure out how many segment there are in the bio. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index b3ac40aef46b31..89b97b5e088185 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -97,19 +97,22 @@ void blk_recalc_rq_segments(struct request *rq) void blk_recount_segments(struct request_queue *q, struct bio *bio) { - bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, - &q->queue_flags); - bool merge_not_need = bio->bi_vcnt < queue_max_segments(q); + unsigned short seg_cnt; + + /* estimate segment number by bi_vcnt for non-cloned bio */ + if (bio_flagged(bio, BIO_CLONED)) + seg_cnt = bio_segments(bio); + else + seg_cnt = bio->bi_vcnt; - if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) && - merge_not_need) - bio->bi_phys_segments = bio->bi_vcnt; + if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && + (seg_cnt < queue_max_segments(q))) + bio->bi_phys_segments = seg_cnt; else { struct bio *nxt = bio->bi_next; bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, - no_sg_merge && merge_not_need); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); bio->bi_next = nxt; } From fbf8e7211ac7858d3df4a4203c18da7a58560784 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 11 Nov 2014 10:04:13 -0700 Subject: [PATCH 096/138] selftests/net: psock_fanout seg faults in sock_fanout_read_ring() The while loop in sock_fanout_read_ring() checks mmap region bounds after access, causing it to segfault. Fix it to check count before accessing header->tp_status. This problem can be reproduced consistently when the test in run as follows: make -C tools/testing/selftests TARGETS=net run_tests or make run_tests from tools/testing/selftests or make run_test from tools/testing/selftests/net Signed-off-by: Shuah Khan Signed-off-by: David S. Miller --- tools/testing/selftests/net/psock_fanout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 57b9c2b7c4ffbf..6f6733331d9597 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -128,7 +128,7 @@ static int sock_fanout_read_ring(int fd, void *ring) struct tpacket2_hdr *header = ring; int count = 0; - while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) { + while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) { count++; header = ring + (count * getpagesize()); } From 394c97f824fa6b62351fe08b722fff2fc5188bfc Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 12 Nov 2014 17:38:08 +0800 Subject: [PATCH 097/138] ALSA: hda/realtek - Change EAPD to verb control This will fix no sound in Linux system after reboot from windows. Change log: - alc662_fill_coef() is replaced with alc_fill_eapd_coef_idx() and move into alc_auto_init_amp(). - For ALC262, ALC267, ALC268, ALC269, ALC233, ALC255, ALC280, ALC282, ALC283, ALC284, ALC285, ALC286, ALC288, ALC290, ALC292, ALC293, ALC294, ALC668, ALC888VC, ALC888VD, ALC891, ALC892, ALC898 and ALC1150, add update COEF control for EAPD setting. - Remove alc269_fill_coef() for update EAPD control line. ADDITIONAL NOTE: Many Realtek cdoecs have a COEF bit to switch the master amp control between COEF and EAPD. Windows drivers seem using COEF while we use EAPD, which is more standard. As a result, some system suffer from the silent output when booting after Windows. This patch sets the COEF bits on the relevant codecs properly to switch to EAPD control. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=87771 Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 105 ++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 30 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index da03693099eb55..172395465e8a63 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -288,6 +288,80 @@ static void alc880_unsol_event(struct hda_codec *codec, unsigned int res) snd_hda_jack_unsol_event(codec, res >> 2); } +/* Change EAPD to verb control */ +static void alc_fill_eapd_coef(struct hda_codec *codec) +{ + int coef; + + coef = alc_get_coef0(codec); + + switch (codec->vendor_id) { + case 0x10ec0262: + alc_update_coef_idx(codec, 0x7, 0, 1<<5); + break; + case 0x10ec0267: + case 0x10ec0268: + alc_update_coef_idx(codec, 0x7, 0, 1<<13); + break; + case 0x10ec0269: + if ((coef & 0x00f0) == 0x0010) + alc_update_coef_idx(codec, 0xd, 0, 1<<14); + if ((coef & 0x00f0) == 0x0020) + alc_update_coef_idx(codec, 0x4, 1<<15, 0); + if ((coef & 0x00f0) == 0x0030) + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + break; + case 0x10ec0280: + case 0x10ec0284: + case 0x10ec0290: + case 0x10ec0292: + alc_update_coef_idx(codec, 0x4, 1<<15, 0); + break; + case 0x10ec0233: + case 0x10ec0255: + case 0x10ec0282: + case 0x10ec0283: + case 0x10ec0286: + case 0x10ec0288: + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + break; + case 0x10ec0285: + case 0x10ec0293: + alc_update_coef_idx(codec, 0xa, 1<<13, 0); + break; + case 0x10ec0662: + if ((coef & 0x00f0) == 0x0030) + alc_update_coef_idx(codec, 0x4, 1<<10, 0); /* EAPD Ctrl */ + break; + case 0x10ec0272: + case 0x10ec0273: + case 0x10ec0663: + case 0x10ec0665: + case 0x10ec0670: + case 0x10ec0671: + case 0x10ec0672: + alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ + break; + case 0x10ec0668: + alc_update_coef_idx(codec, 0x7, 3<<13, 0); + break; + case 0x10ec0867: + alc_update_coef_idx(codec, 0x4, 1<<10, 0); + break; + case 0x10ec0888: + if ((coef & 0x00f0) == 0x0020 || (coef & 0x00f0) == 0x0030) + alc_update_coef_idx(codec, 0x7, 1<<5, 0); + break; + case 0x10ec0892: + alc_update_coef_idx(codec, 0x7, 1<<5, 0); + break; + case 0x10ec0899: + case 0x10ec0900: + alc_update_coef_idx(codec, 0x7, 1<<1, 0); + break; + } +} + /* additional initialization for ALC888 variants */ static void alc888_coef_init(struct hda_codec *codec) { @@ -339,6 +413,7 @@ static void alc_eapd_shutup(struct hda_codec *codec) /* generic EAPD initialization */ static void alc_auto_init_amp(struct hda_codec *codec, int type) { + alc_fill_eapd_coef(codec); alc_auto_setup_eapd(codec, true); switch (type) { case ALC_INIT_GPIO1: @@ -5212,9 +5287,6 @@ static void alc269_fill_coef(struct hda_codec *codec) } } - /* Class D */ - alc_update_coef_idx(codec, 0xd, 0, 1<<14); - /* HP */ alc_update_coef_idx(codec, 0x4, 0, 1<<11); } @@ -6124,29 +6196,6 @@ static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = { {} }; -static void alc662_fill_coef(struct hda_codec *codec) -{ - int coef; - - coef = alc_get_coef0(codec); - - switch (codec->vendor_id) { - case 0x10ec0662: - if ((coef & 0x00f0) == 0x0030) - alc_update_coef_idx(codec, 0x4, 1<<10, 0); /* EAPD Ctrl */ - break; - case 0x10ec0272: - case 0x10ec0273: - case 0x10ec0663: - case 0x10ec0665: - case 0x10ec0670: - case 0x10ec0671: - case 0x10ec0672: - alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ - break; - } -} - /* */ static int patch_alc662(struct hda_codec *codec) @@ -6169,10 +6218,6 @@ static int patch_alc662(struct hda_codec *codec) case 0x10ec0668: spec->init_hook = alc668_restore_default_value; break; - default: - spec->init_hook = alc662_fill_coef; - alc662_fill_coef(codec); - break; } snd_hda_pick_fixup(codec, alc662_fixup_models, From ee7bc3cdc2702ba36930d477b76dacd5b18e8956 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Tue, 11 Nov 2014 23:30:51 -0800 Subject: [PATCH 098/138] cxgb4 : dcb open-lldp interop fixes * In LLD_MANAGED mode, traffic classes were being returned in reverse order to lldp agent. * Priotype of strict is no longer the default returned. * Change behaviour of getdcbx() based on discussions on lldp-devel These were missed as there was no working fetch interface for open-lldp when running in LLD_MANAGED mode till now. Fixes: 76bcb31efc06 ("cxgb4 : Add DCBx support codebase and dcbnl_ops") Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index b6fdb142eacbf7..cca60499400373 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -123,7 +123,11 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, case CXGB4_DCB_INPUT_FW_ENABLED: { /* we're going to use Firmware DCB */ dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; - dcb->supported = CXGB4_DCBX_FW_SUPPORT; + dcb->supported = DCB_CAP_DCBX_LLD_MANAGED; + if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) + dcb->supported |= DCB_CAP_DCBX_VER_IEEE; + else + dcb->supported |= DCB_CAP_DCBX_VER_CEE; break; } @@ -437,14 +441,17 @@ static void cxgb4_getpgtccfg(struct net_device *dev, int tc, *up_tc_map = (1 << tc); /* prio_type is link strict */ - *prio_type = 0x2; + if (*pgid != 0xF) + *prio_type = 0x2; } static void cxgb4_getpgtccfg_tx(struct net_device *dev, int tc, u8 *prio_type, u8 *pgid, u8 *bw_per, u8 *up_tc_map) { - return cxgb4_getpgtccfg(dev, tc, prio_type, pgid, bw_per, up_tc_map, 1); + /* tc 0 is written at MSB position */ + return cxgb4_getpgtccfg(dev, (7 - tc), prio_type, pgid, bw_per, + up_tc_map, 1); } @@ -452,7 +459,9 @@ static void cxgb4_getpgtccfg_rx(struct net_device *dev, int tc, u8 *prio_type, u8 *pgid, u8 *bw_per, u8 *up_tc_map) { - return cxgb4_getpgtccfg(dev, tc, prio_type, pgid, bw_per, up_tc_map, 0); + /* tc 0 is written at MSB position */ + return cxgb4_getpgtccfg(dev, (7 - tc), prio_type, pgid, bw_per, + up_tc_map, 0); } static void cxgb4_setpgtccfg_tx(struct net_device *dev, int tc, @@ -462,6 +471,7 @@ static void cxgb4_setpgtccfg_tx(struct net_device *dev, int tc, struct fw_port_cmd pcmd; struct port_info *pi = netdev2pinfo(dev); struct adapter *adap = pi->adapter; + int fw_tc = 7 - tc; u32 _pgid; int err; @@ -480,8 +490,8 @@ static void cxgb4_setpgtccfg_tx(struct net_device *dev, int tc, } _pgid = be32_to_cpu(pcmd.u.dcb.pgid.pgid); - _pgid &= ~(0xF << (tc * 4)); - _pgid |= pgid << (tc * 4); + _pgid &= ~(0xF << (fw_tc * 4)); + _pgid |= pgid << (fw_tc * 4); pcmd.u.dcb.pgid.pgid = cpu_to_be32(_pgid); INIT_PORT_DCB_WRITE_CMD(pcmd, pi->port_id); @@ -594,7 +604,7 @@ static void cxgb4_getpfccfg(struct net_device *dev, int priority, u8 *pfccfg) priority >= CXGB4_MAX_PRIORITY) *pfccfg = 0; else - *pfccfg = (pi->dcb.pfcen >> priority) & 1; + *pfccfg = (pi->dcb.pfcen >> (7 - priority)) & 1; } /* Enable/disable Priority Pause Frames for the specified Traffic Class @@ -619,9 +629,9 @@ static void cxgb4_setpfccfg(struct net_device *dev, int priority, u8 pfccfg) pcmd.u.dcb.pfc.pfcen = pi->dcb.pfcen; if (pfccfg) - pcmd.u.dcb.pfc.pfcen |= (1 << priority); + pcmd.u.dcb.pfc.pfcen |= (1 << (7 - priority)); else - pcmd.u.dcb.pfc.pfcen &= (~(1 << priority)); + pcmd.u.dcb.pfc.pfcen &= (~(1 << (7 - priority))); err = t4_wr_mbox(adap, adap->mbox, &pcmd, sizeof(pcmd), &pcmd); if (err != FW_PORT_DCB_CFG_SUCCESS) { From cca04b2854ecfb7cd1b8ee84ab38bc99af59f526 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 12 Nov 2014 11:33:52 +0100 Subject: [PATCH 099/138] net: ptp: fix time stamp matching logic for VLAN packets. Commit ae5c6c6d "ptp: Classify ptp over ip over vlan packets" changed the code in two drivers that matches time stamps with PTP frames, with the goal of allowing VLAN tagged PTP packets to receive hardware time stamps. However, that commit failed to account for the VLAN header when parsing IPv4 packets. This patch fixes those two drivers to correctly match VLAN tagged IPv4/UDP PTP messages with their time stamps. This patch should also be applied to v3.17. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpts.c | 2 +- drivers/net/phy/dp83640.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index ab92f67da035f2..4a4388b813ac6b 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -264,7 +264,7 @@ static int cpts_match(struct sk_buff *skb, unsigned int ptp_class, switch (ptp_class & PTP_CLASS_PMASK) { case PTP_CLASS_IPV4: - offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; break; case PTP_CLASS_IPV6: offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 2954052706e8bc..e22e602beef342 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -791,7 +791,7 @@ static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts) switch (type & PTP_CLASS_PMASK) { case PTP_CLASS_IPV4: - offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; break; case PTP_CLASS_IPV6: offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; @@ -934,7 +934,7 @@ static int is_sync(struct sk_buff *skb, int type) switch (type & PTP_CLASS_PMASK) { case PTP_CLASS_IPV4: - offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; break; case PTP_CLASS_IPV6: offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; From 6251edd932ce3faadbfe27b0a0fe79780e0972e9 Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Thu, 13 Nov 2014 04:24:10 +0900 Subject: [PATCH 100/138] netlink: Properly unbind in error conditions. Even if netlink_kernel_cfg::unbind is implemented the unbind() method is not called, because cfg->unbind is omitted in __netlink_kernel_create(). And fix wrong argument of test_bit() and off by one problem. At this point, no unbind() method is implemented, so there is no real issue. Fixes: 4f520900522f ("netlink: have netlink per-protocol bind function return an error code.") Signed-off-by: Hiroaki SHIMODA Cc: Richard Guy Briggs Acked-by: Richard Guy Briggs Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f1de72de273e20..0007b818039708 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1440,7 +1440,7 @@ static void netlink_unbind(int group, long unsigned int groups, return; for (undo = 0; undo < group; undo++) - if (test_bit(group, &groups)) + if (test_bit(undo, &groups)) nlk->netlink_unbind(undo); } @@ -1492,7 +1492,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups - 1, groups, nlk); + netlink_unbind(nlk->ngroups, groups, nlk); return err; } } @@ -2509,6 +2509,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; + nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; if (cfg->compare) nl_table[unit].compare = cfg->compare; From 65eca3a20264a8999570c269406196bd1ae23be7 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 20 Oct 2014 15:58:49 +0200 Subject: [PATCH 101/138] virtio_console: move early VQ enablement Commit f5866db6 (virtio_console: enable VQs early) tried to make sure that DRIVER_OK was set when virtio_console started using its virtqueues. Doing this in add_port(), however, means that we try to set DRIVER_OK again when when a port is dynamically added after the probe function is done. Let's move virtio_device_ready() to the probe function just before trying to use the virtqueues instead. This is fine as nothing can fail inbetween. Reported-by: Thomas Graf Reviewed-by: Michael S. Tsirkin Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index bfa640023e6489..cf7a561fad7cd9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1449,8 +1449,6 @@ static int add_port(struct ports_device *portdev, u32 id) spin_lock_init(&port->outvq_lock); init_waitqueue_head(&port->waitqueue); - virtio_device_ready(portdev->vdev); - /* Fill the in_vq with buffers so the host can send us data. */ nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock); if (!nr_added_bufs) { @@ -2026,6 +2024,8 @@ static int virtcons_probe(struct virtio_device *vdev) spin_lock_init(&portdev->ports_lock); INIT_LIST_HEAD(&portdev->ports); + virtio_device_ready(portdev->vdev); + if (multiport) { unsigned int nr_added_bufs; From 3542aed7480925eb859f7ce101982209cc19a126 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 12 Nov 2014 08:11:56 +0100 Subject: [PATCH 102/138] ALSA: hda - Add mute LED control for Lenovo Ideapad Z560 Lenovo Ideapad Z560 has a mute LED that is controlled via EAPD pin 0x1b on CX20585 codec. (EAPD bit on corresponds to mute LED on.) The machine doesn't need other EAPD, so the fixup concentrates on controlling EAPD 0x1b following the vmaster state (but inversely). Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=665315 Reported-by: Szymon Kowalczyk Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 71e4bad06345c8..e9ebc7bd752cae 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -43,6 +43,7 @@ struct conexant_spec { unsigned int num_eapds; hda_nid_t eapds[4]; bool dynamic_eapd; + hda_nid_t mute_led_eapd; unsigned int parse_flags; /* flag for snd_hda_parse_pin_defcfg() */ @@ -163,6 +164,17 @@ static void cx_auto_vmaster_hook(void *private_data, int enabled) cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, enabled); } +/* turn on/off EAPD according to Master switch (inversely!) for mute LED */ +static void cx_auto_vmaster_hook_mute_led(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + struct conexant_spec *spec = codec->spec; + + snd_hda_codec_write(codec, spec->mute_led_eapd, 0, + AC_VERB_SET_EAPD_BTLENABLE, + enabled ? 0x00 : 0x02); +} + static int cx_auto_build_controls(struct hda_codec *codec) { int err; @@ -223,6 +235,7 @@ enum { CXT_FIXUP_TOSHIBA_P105, CXT_FIXUP_HP_530, CXT_FIXUP_CAP_MIX_AMP_5047, + CXT_FIXUP_MUTE_LED_EAPD, }; /* for hda_fixup_thinkpad_acpi() */ @@ -557,6 +570,18 @@ static void cxt_fixup_olpc_xo(struct hda_codec *codec, } } +static void cxt_fixup_mute_led_eapd(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_eapd = 0x1b; + spec->dynamic_eapd = 1; + spec->gen.vmaster_mute.hook = cx_auto_vmaster_hook_mute_led; + } +} + /* * Fix max input level on mixer widget to 0dB * (originally it has 0x2b steps with 0dB offset 0x14) @@ -705,6 +730,10 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_cap_mix_amp_5047, }, + [CXT_FIXUP_MUTE_LED_EAPD] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_mute_led_eapd, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -762,6 +791,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), @@ -780,6 +810,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_PINCFG_LEMOTE_A1004, .name = "lemote-a1004" }, { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" }, { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" }, + { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, {} }; From 9b0b26580a753d4d6bdd2b8b4ca9a8f3f2d39065 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 13 Nov 2014 12:22:01 +0000 Subject: [PATCH 103/138] arm64: efi: Fix stub cache maintenance While efi-entry.S mentions that efi_entry() will have relocated the kernel image, it actually means that efi_entry will have placed a copy of the kernel in the appropriate location, and until this is branched to at the end of efi_entry.S, all instructions are executed from the original image. Thus while the flush in efi_entry.S does ensure that the copy is visible to noncacheable accesses, it does not guarantee that this is true for the image instructions are being executed from. This could have disasterous effects when the MMU and caches are disabled if the image has not been naturally evicted to the PoC. Additionally, due to a missing dsb following the ic ialluis, the new kernel image is not necessarily clean in the I-cache when it is branched to, with similar potentially disasterous effects. This patch adds additional flushing to ensure that the currently executing stub text is flushed to the PoC and is thus visible to noncacheable accesses. As it is placed after the instructions cache maintenance for the new image and __flush_dcache_area already contains a dsb, we do not need to add a separate barrier to ensure completion of the icache maintenance. Comments are updated to clarify the situation with regard to the two images and the maintenance required for both. Fixes: 3c7f255039a2ad6ee1e3890505caf0d029b22e29 Signed-off-by: Mark Rutland Acked-by: Joel Schopp Reviewed-by: Roy Franz Tested-by: Tom Lendacky Cc: Ard Biesheuvel Cc: Ian Campbell Cc: Leif Lindholm Cc: Mark Salter Cc: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi-entry.S | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 619b1dd7bcdea7..d18a449409683c 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -54,18 +54,17 @@ ENTRY(efi_stub_entry) b.eq efi_load_fail /* - * efi_entry() will have relocated the kernel image if necessary - * and we return here with device tree address in x0 and the kernel - * entry point stored at *image_addr. Save those values in registers - * which are callee preserved. + * efi_entry() will have copied the kernel image if necessary and we + * return here with device tree address in x0 and the kernel entry + * point stored at *image_addr. Save those values in registers which + * are callee preserved. */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address mov x21, x0 /* - * Flush dcache covering current runtime addresses - * of kernel text/data. Then flush all of icache. + * Calculate size of the kernel Image (same for original and copy). */ adrp x1, _text add x1, x1, #:lo12:_text @@ -73,9 +72,24 @@ ENTRY(efi_stub_entry) add x2, x2, #:lo12:_edata sub x1, x2, x1 + /* + * Flush the copied Image to the PoC, and ensure it is not shadowed by + * stale icache entries from before relocation. + */ bl __flush_dcache_area ic ialluis + /* + * Ensure that the rest of this function (in the original Image) is + * visible when the caches are disabled. The I-cache can't have stale + * entries for the VA range of the current image, so no maintenance is + * necessary. + */ + adr x0, efi_stub_entry + adr x1, efi_stub_entry_end + sub x1, x1, x0 + bl __flush_dcache_area + /* Turn off Dcache and MMU */ mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 @@ -105,4 +119,5 @@ efi_load_fail: ldp x29, x30, [sp], #32 ret +efi_stub_entry_end: ENDPROC(efi_stub_entry) From 287e8c6a41749e5affd6a4faa6a6102ba1594406 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Wed, 8 Oct 2014 16:45:59 +0100 Subject: [PATCH 104/138] arm64: Fix data type for physical address Use phys_addr_t for physical address in alloc_init_pud. Although phys_addr_t and unsigned long are 64 bit in arm64, it is better to use phys_addr_t to describe physical addresses. Signed-off-by: Min-Hua Chen Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0bf90d26e7455d..f4f8b500f74c63 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -202,7 +202,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, + unsigned long end, phys_addr_t phys, int map_io) { pud_t *pud; From 97fc15436b36ee3956efad83e22a557991f7d19d Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 12 Nov 2014 21:07:44 +0000 Subject: [PATCH 105/138] arm64: __clear_user: handle exceptions on strb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ARM64 currently doesn't fix up faults on the single-byte (strb) case of __clear_user... which means that we can cause a nasty kernel panic as an ordinary user with any multiple PAGE_SIZE+1 read from /dev/zero. i.e.: dd if=/dev/zero of=foo ibs=1 count=1 (or ibs=65537, etc.) This is a pretty obscure bug in the general case since we'll only __do_kernel_fault (since there's no extable entry for pc) if the mmap_sem is contended. However, with CONFIG_DEBUG_VM enabled, we'll always fault. if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: down_read(&mm->mmap_sem); } else { /* * The above down_read_trylock() might have succeeded in * which * case, we'll have missed the might_sleep() from * down_read(). */ might_sleep(); if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; } Fix that by adding an extable entry for the strb instruction, since it touches user memory, similar to the other stores in __clear_user. Signed-off-by: Kyle McMartin Reported-by: Miloš Prchlík Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/lib/clear_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 6e0ed93d51fe18..c17967fdf5f600 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -46,7 +46,7 @@ USER(9f, strh wzr, [x0], #2 ) sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f - strb wzr, [x0] +USER(9f, strb wzr, [x0] ) 5: mov x0, #0 ret ENDPROC(__clear_user) From 899d5933b2dd2720f2b20b01eaa07871aa6ad096 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Tue, 11 Nov 2014 09:41:27 -0500 Subject: [PATCH 106/138] Correct the race condition in aarch64_insn_patch_text_sync() When experimenting with patches to provide kprobes support for aarch64 smp machines would hang when inserting breakpoints into kernel code. The hangs were caused by a race condition in the code called by aarch64_insn_patch_text_sync(). The first processor in the aarch64_insn_patch_text_cb() function would patch the code while other processors were still entering the function and incrementing the cpu_count field. This resulted in some processors never observing the exit condition and exiting the function. Thus, processors in the system hung. The first processor to enter the patching function performs the patching and signals that the patching is complete with an increment of the cpu_count field. When all the processors have incremented the cpu_count field the cpu_count will be num_cpus_online()+1 and they will return to normal execution. Fixes: ae16480785de arm64: introduce interfaces to hotpatch kernel and module code Signed-off-by: William Cohen Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/kernel/insn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index e007714ded04e6..8cd27fedc8b6a1 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -163,9 +163,10 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) * which ends with "dsb; isb" pair guaranteeing global * visibility. */ - atomic_set(&pp->cpu_count, -1); + /* Notify other processors with an additional increment. */ + atomic_inc(&pp->cpu_count); } else { - while (atomic_read(&pp->cpu_count) != -1) + while (atomic_read(&pp->cpu_count) <= num_online_cpus()) cpu_relax(); isb(); } From 5fd6690c8fb2c3012012979dd8ce7492c69c5d86 Mon Sep 17 00:00:00 2001 From: Neil Zhang Date: Tue, 28 Oct 2014 05:44:01 +0000 Subject: [PATCH 107/138] arm64: ARCH_PFN_OFFSET should be unsigned long pfns are unsigned long, but PHYS_PFN_OFFSET is phys_addr_t. This leads to page_to_pfn() returning phys_addr_t which cause type mismatches in some print statements. Signed-off-by: Neil Zhang Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ccc7087d3c4ea7..a62cd077457b93 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -142,7 +142,7 @@ static inline void *phys_to_virt(phys_addr_t x) * virt_to_page(k) convert a _valid_ virtual address to struct page * * virt_addr_valid(k) indicates whether a virtual address is valid */ -#define ARCH_PFN_OFFSET PHYS_PFN_OFFSET +#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) From ad0eab9293485d1c06237e9249f6d4dfa3d93d4d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Nov 2014 20:15:23 +1100 Subject: [PATCH 108/138] Fix thinko in iov_iter_single_seg_count The branches of the if (i->type & ITER_BVEC) statement in iov_iter_single_seg_count() are the wrong way around; if ITER_BVEC is clear then we use i->bvec, when we should be using i->iov. This fixes it. In my case, the symptom that this caused was that a KVM guest doing filesystem operations on a virtual disk would result in one of qemu's threads on the host going into an infinite loop in generic_perform_write(). The loop would hit the copied == 0 case and call iov_iter_single_seg_count() to reduce the number of bytes to try to process, but because of the error, iov_iter_single_seg_count() would just return i->count and the loop made no progress and continued forever. Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Paul Mackerras Signed-off-by: Al Viro --- mm/iov_iter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/iov_iter.c b/mm/iov_iter.c index eafcf60f6b832b..e34a3cb6aad6cb 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -911,9 +911,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) if (i->nr_segs == 1) return i->count; else if (i->type & ITER_BVEC) - return min(i->count, i->iov->iov_len - i->iov_offset); - else return min(i->count, i->bvec->bv_len - i->iov_offset); + else + return min(i->count, i->iov->iov_len - i->iov_offset); } EXPORT_SYMBOL(iov_iter_single_seg_count); From 3231300bb986947a6b74e7075d84a2f434e4d788 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 22 Oct 2014 17:13:26 -0700 Subject: [PATCH 109/138] ceph: fix flush tid comparision TID of cap flush ack is 64 bits, but ceph_inode_info::flushing_cap_tid is only 16 bits. 16 bits should be plenty to let the cap flush updates pipeline appropriately, but we need to cast in the proper direction when comparing these differently-sized versions. So downcast the 64-bits one to 16 bits. Reflects ceph.git commit a5184cf46a6e867287e24aeb731634828467cd98. Signed-off-by: Yan, Zheng Reviewed-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f747..cefca661464b91 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, for (i = 0; i < CEPH_CAP_BITS; i++) if ((dirty & (1 << i)) && - flush_tid == ci->i_cap_flush_tid[i]) + (u16)flush_tid == ci->i_cap_flush_tid[i]) cleaned |= 1 << i; dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," From aaef31703a0cf6a733e651885bfb49edc3ac6774 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 23 Oct 2014 00:25:22 +0400 Subject: [PATCH 110/138] libceph: do not crash on large auth tickets Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth tickets will have their buffers vmalloc'ed, which leads to the following crash in crypto: [ 28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0 [ 28.686032] IP: [] scatterwalk_pagedone+0x22/0x80 [ 28.686032] PGD 0 [ 28.688088] Oops: 0000 [#1] PREEMPT SMP [ 28.688088] Modules linked in: [ 28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305 [ 28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 28.688088] Workqueue: ceph-msgr con_work [ 28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000 [ 28.688088] RIP: 0010:[] [] scatterwalk_pagedone+0x22/0x80 [ 28.688088] RSP: 0018:ffff8800d903f688 EFLAGS: 00010286 [ 28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0 [ 28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750 [ 28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880 [ 28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010 [ 28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000 [ 28.688088] FS: 00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000 [ 28.688088] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0 [ 28.688088] Stack: [ 28.688088] ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32 [ 28.688088] ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020 [ 28.688088] ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010 [ 28.688088] Call Trace: [ 28.688088] [] scatterwalk_done+0x38/0x40 [ 28.688088] [] scatterwalk_done+0x38/0x40 [ 28.688088] [] blkcipher_walk_done+0x182/0x220 [ 28.688088] [] crypto_cbc_encrypt+0x15f/0x180 [ 28.688088] [] ? crypto_aes_set_key+0x30/0x30 [ 28.688088] [] ceph_aes_encrypt2+0x29c/0x2e0 [ 28.688088] [] ceph_encrypt2+0x93/0xb0 [ 28.688088] [] ceph_x_encrypt+0x4a/0x60 [ 28.688088] [] ? ceph_buffer_new+0x5d/0xf0 [ 28.688088] [] ceph_x_build_authorizer.isra.6+0x297/0x360 [ 28.688088] [] ? kmem_cache_alloc_trace+0x11b/0x1c0 [ 28.688088] [] ? ceph_auth_create_authorizer+0x36/0x80 [ 28.688088] [] ceph_x_create_authorizer+0x63/0xd0 [ 28.688088] [] ceph_auth_create_authorizer+0x54/0x80 [ 28.688088] [] get_authorizer+0x80/0xd0 [ 28.688088] [] prepare_write_connect+0x18b/0x2b0 [ 28.688088] [] try_read+0x1e59/0x1f10 This is because we set up crypto scatterlists as if all buffers were kmalloc'ed. Fix it. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/crypto.c | 169 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 37 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 62fc5e7a9acf75..790fe89d90c0ac 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -90,11 +90,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; +/* + * Should be used for buffers allocated with ceph_kvmalloc(). + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt + * in-buffer (msg front). + * + * Dispose of @sgt with teardown_sgtable(). + * + * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() + * in cases where a single sg is sufficient. No attempt to reduce the + * number of sgs by squeezing physically contiguous pages together is + * made though, for simplicity. + */ +static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, + const void *buf, unsigned int buf_len) +{ + struct scatterlist *sg; + const bool is_vmalloc = is_vmalloc_addr(buf); + unsigned int off = offset_in_page(buf); + unsigned int chunk_cnt = 1; + unsigned int chunk_len = PAGE_ALIGN(off + buf_len); + int i; + int ret; + + if (buf_len == 0) { + memset(sgt, 0, sizeof(*sgt)); + return -EINVAL; + } + + if (is_vmalloc) { + chunk_cnt = chunk_len >> PAGE_SHIFT; + chunk_len = PAGE_SIZE; + } + + if (chunk_cnt > 1) { + ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); + if (ret) + return ret; + } else { + WARN_ON(chunk_cnt != 1); + sg_init_table(prealloc_sg, 1); + sgt->sgl = prealloc_sg; + sgt->nents = sgt->orig_nents = 1; + } + + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { + struct page *page; + unsigned int len = min(chunk_len - off, buf_len); + + if (is_vmalloc) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + sg_set_page(sg, page, len, off); + + off = 0; + buf += len; + buf_len -= len; + } + WARN_ON(buf_len != 0); + + return 0; +} + +static void teardown_sgtable(struct sg_table *sgt) +{ + if (sgt->orig_nents > 1) + sg_free_table(sgt); +} + static int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[2], sg_out[1]; + struct scatterlist sg_in[2], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -110,16 +181,18 @@ static int ceph_aes_encrypt(const void *key, int key_len, *dst_len = src_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], src, src_len); sg_set_buf(&sg_in[1], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -128,16 +201,22 @@ static int ceph_aes_encrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, @@ -145,7 +224,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, const void *src1, size_t src1_len, const void *src2, size_t src2_len) { - struct scatterlist sg_in[3], sg_out[1]; + struct scatterlist sg_in[3], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -161,17 +241,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, *dst_len = src1_len + src2_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 3); sg_set_buf(&sg_in[0], src1, src1_len); sg_set_buf(&sg_in[1], src2, src2_len); sg_set_buf(&sg_in[2], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -182,23 +264,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src1_len + src2_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt2 failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[2]; + struct sg_table sg_in; + struct scatterlist sg_out[2], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -210,16 +299,16 @@ static int ceph_aes_decrypt(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - crypto_blkcipher_setkey((void *)tfm, key, key_len); - sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); - sg_set_buf(sg_in, src, src_len); sg_set_buf(&sg_out[0], dst, *dst_len); sg_set_buf(&sg_out[1], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -228,12 +317,10 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst_len) @@ -251,7 +338,12 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt2(const void *key, int key_len, @@ -259,7 +351,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len, void *dst2, size_t *dst2_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[3]; + struct sg_table sg_in; + struct scatterlist sg_out[3], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -271,17 +364,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - sg_init_table(sg_in, 1); - sg_set_buf(sg_in, src, src_len); sg_init_table(sg_out, 3); sg_set_buf(&sg_out[0], dst1, *dst1_len); sg_set_buf(&sg_out[1], dst2, *dst2_len); sg_set_buf(&sg_out[2], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -290,12 +383,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst1_len) @@ -325,7 +416,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len, dst2, *dst2_len, 1); */ - return 0; +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } From a390de0208e7f2f8fdb2fbf970240e4f7b308037 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 4 Nov 2014 18:32:14 +0300 Subject: [PATCH 111/138] libceph: unlink from o_linger_requests when clearing r_osd Requests have to be unlinked from both osd->o_requests (normal requests) and osd->o_linger_requests (linger requests) lists when clearing req->r_osd. Otherwise __unregister_linger_request() gets confused and we trip over a !list_empty(&osd->o_linger_requests) assert in __remove_osd(). MON=1 OSD=1: # cat remove-osd.sh #!/bin/bash rbd create --size 1 test DEV=$(rbd map test) ceph osd out 0 sleep 3 rbd map dne/dne # obtain a new osdmap as a side effect rbd unmap $DEV & # will block sleep 3 ceph osd in 0 Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f3fc54eac09d32..75abaa87abaca1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1395,6 +1395,7 @@ static int __map_request(struct ceph_osd_client *osdc, if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); + list_del_init(&req->r_linger_osd_item); req->r_osd = NULL; } From ba9d114ec5578e6e99a4dfa37ff8ae688040fd64 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Nov 2014 15:45:58 +0300 Subject: [PATCH 112/138] libceph: clear r_req_lru_item in __unregister_linger_request() kick_requests() can put linger requests on the notarget list. This means we need to clear the much-overloaded req->r_req_lru_item in __unregister_linger_request() as well, or we get an assertion failure in ceph_osdc_release_request() - !list_empty(&req->r_req_lru_item). AFAICT the assumption was that registered linger requests cannot be on any of req->r_req_lru_item lists, but that's clearly not the case. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 75abaa87abaca1..decc3b74e65f45 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1254,6 +1254,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + + list_del_init(&req->r_req_lru_item); /* can be on notarget */ ceph_osdc_put_request(req); } From cc9f1f518cec079289d11d732efa490306b1ddad Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Nov 2014 19:33:44 +0300 Subject: [PATCH 113/138] libceph: change from BUG to WARN for __remove_osd() asserts No reason to use BUG_ON for osd request list assertions. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index decc3b74e65f45..6f164289bde886 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1007,8 +1007,8 @@ static void put_osd(struct ceph_osd *osd) static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { dout("__remove_osd %p\n", osd); - BUG_ON(!list_empty(&osd->o_requests)); - BUG_ON(!list_empty(&osd->o_linger_requests)); + WARN_ON(!list_empty(&osd->o_requests)); + WARN_ON(!list_empty(&osd->o_linger_requests)); rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); From 242bcd5ba1dcea802c0ad03344f626a727212399 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Thu, 13 Nov 2014 05:26:19 +0400 Subject: [PATCH 114/138] net/smsc911x: Fix rare soft reset timeout issue due to PHY power-down mode The patch affect SMSC LAN generation 4 chips with integrated PHY (LAN9221). It is possible that PHY could enter power-down mode (ENERGYON clear), between ENERGYON bit check in smsc911x_phy_disable_energy_detect and SRST bit set in smsc911x_soft_reset. This could happen, for example, if someone disconnect ethernet cable between the checks. The PHY in a power-down mode would prevent the MAC portion of chip to be software reseted. Initially found by code review, confirmed later using test case. This is low probability issue, and in order to reproduce it you have to run the script: while true; do ifconfig eth0 down ifconfig eth0 up || break done While the script is running you have to plug/unplug ethernet cable many times (using gpio controlled ethernet switch, for example) until get: [ 4516.477783] ADDRCONF(NETDEV_UP): eth0: link is not ready [ 4516.512207] smsc911x smsc911x.0: eth0: SMSC911x/921x identified at 0xce006000, IRQ: 336 [ 4516.524658] ADDRCONF(NETDEV_UP): eth0: link is not ready [ 4516.559082] smsc911x smsc911x.0: eth0: SMSC911x/921x identified at 0xce006000, IRQ: 336 [ 4516.571990] ADDRCONF(NETDEV_UP): eth0: link is not ready ifconfig: SIOCSIFFLAGS: Input/output error The patch was reviewed by Steve Glendinning and Microchip Team. Signed-off-by: Alexander Kochetkov Acked-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index affb29da353e29..1e1f6194cb3714 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1356,12 +1356,8 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) return rc; } - /* - * If energy is detected the PHY is already awake so is not necessary - * to disable the energy detect power-down mode. - */ - if ((rc & MII_LAN83C185_EDPWRDOWN) && - !(rc & MII_LAN83C185_ENERGYON)) { + /* Only disable if energy detect mode is already enabled */ + if (rc & MII_LAN83C185_EDPWRDOWN) { /* Disable energy detect mode for this SMSC Transceivers */ rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS, rc & (~MII_LAN83C185_EDPWRDOWN)); From 6ff53fd37175e35dc4f70b0e8f48b28338fbee29 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Thu, 13 Nov 2014 05:26:20 +0400 Subject: [PATCH 115/138] net/smsc911x: Fix delays in the PHY enable/disable routines Increased delay in the smsc911x_phy_disable_energy_detect (from 1ms to 2ms). Dropped delays in the smsc911x_phy_enable_energy_detect (100ms and 1ms). The patch affect SMSC LAN generation 4 chips with integrated PHY (LAN9221). I saw problems with soft reset due to wrong udelay timings. After I fixed udelay, I measured the time needed to bring integrated PHY from power-down to operational mode (the time beetween clearing EDPWRDOWN bit and soft reset complete event). I got 1ms (measured using ktime_get). The value is equal to the current value (1ms) used in the smsc911x_phy_disable_energy_detect. It is near the upper bound and in order to avoid rare soft reset faults it is doubled (2ms). I don't know official timing for bringing up integrated PHY as specs doesn't clarify this (or may be I didn't found). It looks safe to drop delays before and after setting EDPWRDOWN bit (enable PHY power-down mode). I didn't saw any regressions with the patch. The patch was reviewed by Steve Glendinning and Microchip Team. Signed-off-by: Alexander Kochetkov Acked-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 1e1f6194cb3714..c3bf17f89b0574 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1366,8 +1366,8 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) SMSC_WARN(pdata, drv, "Failed writing PHY control reg"); return rc; } - - mdelay(1); + /* Allow PHY to wakeup */ + mdelay(2); } return 0; @@ -1389,7 +1389,6 @@ static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata) /* Only enable if energy detect mode is already disabled */ if (!(rc & MII_LAN83C185_EDPWRDOWN)) { - mdelay(100); /* Enable energy detect mode for this SMSC Transceivers */ rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS, rc | MII_LAN83C185_EDPWRDOWN); @@ -1398,8 +1397,6 @@ static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata) SMSC_WARN(pdata, drv, "Failed writing PHY control reg"); return rc; } - - mdelay(1); } return 0; } From 0c828f2f8395fb5e7faf0a116e476a3ce992a199 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Nov 2014 13:10:48 +0800 Subject: [PATCH 116/138] lib: rhashtable - Remove weird non-ASCII characters from comments My editor spewed garbage that looked like memory corruption on my screen. It turns out that a number of occurences of "fi" got turned into a ligature. This patch replaces these ligatures with the ASCII letters "fi". Signed-off-by: Herbert Xu Cheers, Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 081be3ba9ea828..624a0b7c05ef15 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -230,7 +230,7 @@ int rhashtable_expand(struct rhashtable *ht, gfp_t flags) ht->shift++; /* For each new bucket, search the corresponding old bucket - * for the first entry that hashes to the new bucket, and + * for the first entry that hashes to the new bucket, and * link the new bucket to that entry. Since all the entries * which will end up in the new bucket appear in the same * old bucket, this constructs an entirely valid new hash @@ -248,8 +248,8 @@ int rhashtable_expand(struct rhashtable *ht, gfp_t flags) } /* Publish the new table pointer. Lookups may now traverse - * the new table, but they will not benefit from any - * additional efficiency until later steps unzip the buckets. + * the new table, but they will not benefit from any + * additional efficiency until later steps unzip the buckets. */ rcu_assign_pointer(ht->tbl, new_tbl); @@ -306,14 +306,14 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) ht->shift--; - /* Link each bucket in the new table to the first bucket + /* Link each bucket in the new table to the first bucket * in the old table that contains entries which will hash * to the new bucket. */ for (i = 0; i < ntbl->size; i++) { ntbl->buckets[i] = tbl->buckets[i]; - /* Link each bucket in the new table to the first bucket + /* Link each bucket in the new table to the first bucket * in the old table that contains entries which will hash * to the new bucket. */ From ccf899a27c08038db91765ff12bb0380dcd85887 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Thu, 13 Nov 2014 09:14:34 +0100 Subject: [PATCH 117/138] smsc911x: power-up phydev before doing a software reset. With commit be9dad1f9f26604fb ("net: phy: suspend phydev when going to HALTED"), the PHY device will be put in a low-power mode using BMCR_PDOWN if the the interface is set down. The smsc911x driver does a software_reset opening the device driver (ndo_open). In such case, the PHY must be powered-up before access to any register and before calling the software_reset function. Otherwise, as the PHY is powered down the software reset fails and the interface can not be enabled again. This patch fixes this scenario that is easy to reproduce setting down the network interface and setting up again. $ ifconfig eth0 down $ ifconfig eth0 up ifconfig: SIOCSIFFLAGS: Input/output error Signed-off-by: Enric Balletbo i Serra Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c3bf17f89b0574..77ed74561e5fe8 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1342,6 +1342,42 @@ static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata) spin_unlock(&pdata->mac_lock); } +static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata) +{ + int rc = 0; + + if (!pdata->phy_dev) + return rc; + + /* If the internal PHY is in General Power-Down mode, all, except the + * management interface, is powered-down and stays in that condition as + * long as Phy register bit 0.11 is HIGH. + * + * In that case, clear the bit 0.11, so the PHY powers up and we can + * access to the phy registers. + */ + rc = phy_read(pdata->phy_dev, MII_BMCR); + if (rc < 0) { + SMSC_WARN(pdata, drv, "Failed reading PHY control reg"); + return rc; + } + + /* If the PHY general power-down bit is not set is not necessary to + * disable the general power down-mode. + */ + if (rc & BMCR_PDOWN) { + rc = phy_write(pdata->phy_dev, MII_BMCR, rc & ~BMCR_PDOWN); + if (rc < 0) { + SMSC_WARN(pdata, drv, "Failed writing PHY control reg"); + return rc; + } + + usleep_range(1000, 1500); + } + + return 0; +} + static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) { int rc = 0; @@ -1407,6 +1443,16 @@ static int smsc911x_soft_reset(struct smsc911x_data *pdata) unsigned int temp; int ret; + /* + * Make sure to power-up the PHY chip before doing a reset, otherwise + * the reset fails. + */ + ret = smsc911x_phy_general_power_up(pdata); + if (ret) { + SMSC_WARN(pdata, drv, "Failed to power-up the PHY chip"); + return ret; + } + /* * LAN9210/LAN9211/LAN9220/LAN9221 chips have an internal PHY that * are initialized in a Energy Detect Power-Down mode that prevents From 19ca9fc1445b76b60d34148f7ff837b055f5dcf3 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Thu, 13 Nov 2014 14:43:08 -0200 Subject: [PATCH 118/138] vxlan: Do not reuse sockets for a different address family Currently, we only match against local port number in order to reuse socket. But if this new vxlan wants an IPv6 socket and a IPv4 one bound to that port, vxlan will reuse an IPv4 socket as IPv6 and a panic will follow. The following steps reproduce it: # ip link add vxlan6 type vxlan id 42 group 229.10.10.10 \ srcport 5000 6000 dev eth0 # ip link add vxlan7 type vxlan id 43 group ff0e::110 \ srcport 5000 6000 dev eth0 # ip link set vxlan6 up # ip link set vxlan7 up [ 4.187481] BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 ... [ 4.188076] Call Trace: [ 4.188085] [] ? ipv6_sock_mc_join+0x3a/0x630 [ 4.188098] [] vxlan_igmp_join+0x66/0xd0 [vxlan] [ 4.188113] [] process_one_work+0x220/0x710 [ 4.188125] [] ? process_one_work+0x1b4/0x710 [ 4.188138] [] worker_thread+0x11b/0x3a0 [ 4.188149] [] ? process_one_work+0x710/0x710 So address family must also match in order to reuse a socket. Reported-by: Jean-Tsung Hsiao Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index cfb892b265e8b6..fa9dc45b75a6f9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -275,13 +275,15 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } -/* Find VXLAN socket based on network namespace and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port) + if (inet_sk(vs->sock->sk)->inet_sport == port && + inet_sk(vs->sock->sk)->sk.sk_family == family) return vs; } return NULL; @@ -300,11 +302,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port) +static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, port); + vs = vxlan_find_sock(net, family, port); if (!vs) return NULL; @@ -1773,7 +1776,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1827,7 +1831,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; dst_release(ndst); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1987,13 +1992,15 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs; + bool ipv6 = vxlan->flags & VXLAN_F_IPV6; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(vxlan->net, vxlan->dst_port); + vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port); if (vs) { /* If we have a socket with same port already, reuse it */ atomic_inc(&vs->refcnt); @@ -2384,6 +2391,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; + bool ipv6 = flags & VXLAN_F_IPV6; vs = vxlan_socket_create(net, port, rcv, data, flags); if (!IS_ERR(vs)) @@ -2393,7 +2401,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, return vs; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(net, port); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); if (vs) { if (vs->rcv == rcv) atomic_inc(&vs->refcnt); @@ -2552,7 +2560,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; - if (vxlan_find_vni(net, vni, vxlan->dst_port)) { + if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; } From c406515239376fc93a30d5d03192182160cbd3fb Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Thu, 13 Nov 2014 15:19:05 -0800 Subject: [PATCH 119/138] zram: avoid kunmap_atomic() of a NULL pointer zram could kunmap_atomic() a NULL pointer in a rare situation: a zram page becomes a full-zeroed page after a partial write io. The current code doesn't handle this case and performs kunmap_atomic() on a NULL pointer, which panics the kernel. This patch fixes this issue. Signed-off-by: Weijie Yang Cc: Sergey Senozhatsky Cc: Dan Streetman Cc: Nitin Gupta Cc: Weijie Yang Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2ad0b5bce44be8..3920ee45aa5942 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (page_zero_filled(uncmem)) { - kunmap_atomic(user_mem); + if (user_mem) + kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); From 58420016303769f74c58248a59ca0f435041b352 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:07 -0800 Subject: [PATCH 120/138] mm/compaction: skip the range until proper target pageblock is met Commit 7d49d8868336 ("mm, compaction: reduce zone checking frequency in the migration scanner") has a side-effect that changes the iteration range calculation. Before the change, block_end_pfn is calculated using start_pfn, but now it blindly adds pageblock_nr_pages to the previous value. This causes the problem that isolation_start_pfn is larger than block_end_pfn when we isolate the page with more than pageblock order. In this case, isolation would fail due to an invalid range parameter. To prevent this, this patch implements skipping the range until a proper target pageblock is met. Without this patch, CMA with more than pageblock order always fails but with this patch it will succeed. Signed-off-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Minchan Kim Cc: Michal Nazarewicz Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/compaction.c b/mm/compaction.c index ec74cf0123efd3..4f0151cfd2387e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc, block_end_pfn = min(block_end_pfn, end_pfn); + /* + * pfn could pass the block_end_pfn if isolated freepage + * is more than pageblock order. In this case, we adjust + * scanning range to right one. + */ + if (pfn >= block_end_pfn) { + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); + } + if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) break; From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:11 -0800 Subject: [PATCH 121/138] mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype Before describing bugs itself, I first explain definition of freepage. 1. pages on buddy list are counted as freepage. 2. pages on isolate migratetype buddy list are *not* counted as freepage. 3. pages on cma buddy list are counted as CMA freepage, too. Now, I describe problems and related patch. Patch 1: There is race conditions on getting pageblock migratetype that it results in misplacement of freepages on buddy list, incorrect freepage count and un-availability of freepage. Patch 2: Freepages on pcp list could have stale cached information to determine migratetype of buddy list to go. This causes misplacement of freepages on buddy list and incorrect freepage count. Patch 4: Merging between freepages on different migratetype of pageblocks will cause freepages accouting problem. This patch fixes it. Without patchset [3], above problem doesn't happens on my CMA allocation test, because CMA reserved pages aren't used at all. So there is no chance for above race. With patchset [3], I did simple CMA allocation test and get below result: - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation - run kernel build (make -j16) on background - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval - Result: more than 5000 freepage count are missed With patchset [3] and this patchset, I found that no freepage count are missed so that I conclude that problems are solved. On my simple memory offlining test, these problems also occur on that environment, too. This patch (of 4): There are two paths to reach core free function of buddy allocator, __free_one_page(), one is free_one_page()->__free_one_page() and the other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page(). Each paths has race condition causing serious problems. At first, this patch is focused on first type of freepath. And then, following patch will solve the problem in second type of freepath. In the first type of freepath, we got migratetype of freeing page without holding the zone lock, so it could be racy. There are two cases of this race. 1. pages are added to isolate buddy list after restoring orignal migratetype CPU1 CPU2 get migratetype => return MIGRATE_ISOLATE call free_one_page() with MIGRATE_ISOLATE grab the zone lock unisolate pageblock release the zone lock grab the zone lock call __free_one_page() with MIGRATE_ISOLATE freepage go into isolate buddy list, although pageblock is already unisolated This may cause two problems. One is that we can't use this page anymore until next isolation attempt of this pageblock, because freepage is on isolate buddy list. The other is that freepage accouting could be wrong due to merging between different buddy list. Freepages on isolate buddy list aren't counted as freepage, but ones on normal buddy list are counted as freepage. If merge happens, buddy freepage on normal buddy list is inevitably moved to isolate buddy list without any consideration of freepage accouting so it could be incorrect. 2. pages are added to normal buddy list while pageblock is isolated. It is similar with above case. This also may cause two problems. One is that we can't keep these freepages from being allocated. Although this pageblock is isolated, freepage would be added to normal buddy list so that it could be allocated without any restriction. And the other problem is same as case 1, that it, incorrect freepage accouting. This race condition would be prevented by checking migratetype again with holding the zone lock. Because it is somewhat heavy operation and it isn't needed in common case, we want to avoid rechecking as much as possible. So this patch introduce new variable, nr_isolate_pageblock in struct zone to check if there is isolated pageblock. With this, we can avoid to re-check migratetype in common case and do it only if there is isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above mentioned problems. Changes from v3: Add one more check in free_one_page() that checks whether migratetype is MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens. Signed-off-by: Joonsoo Kim Acked-by: Minchan Kim Acked-by: Michal Nazarewicz Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 +++++++++ include/linux/page-isolation.h | 8 ++++++++ mm/page_alloc.c | 11 +++++++++-- mm/page_isolation.c | 2 ++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 48bf12ef6620cc..ffe66e381c0423 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -431,6 +431,15 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Number of isolated pageblock. It is used to solve incorrect + * freepage counting problem due to racy retrieving migratetype + * of pageblock. Protected by zone->lock. + */ + unsigned long nr_isolate_pageblock; +#endif + #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e77406790..2dc1e1697b451c 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -2,6 +2,10 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return zone->nr_isolate_pageblock; +} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return false; +} static inline bool is_migrate_isolate_page(struct page *page) { return false; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9cd36b82244443..df1da25b309b12 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -739,9 +739,16 @@ static void free_one_page(struct zone *zone, if (nr_scanned) __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); + if (unlikely(has_isolate_pageblock(zone) || + is_migrate_isolate(migratetype))) { + migratetype = get_pfnblock_migratetype(page, pfn); + if (is_migrate_isolate(migratetype)) + goto skip_counting; + } + __mod_zone_freepage_state(zone, 1 << order, migratetype); + +skip_counting: __free_one_page(page, pfn, zone, order, migratetype); - if (unlikely(!is_migrate_isolate(migratetype))) - __mod_zone_freepage_state(zone, 1 << order, migratetype); spin_unlock(&zone->lock); } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index d1473b2e948173..1fa4a4d72ecc47 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -60,6 +60,7 @@ int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) int migratetype = get_pageblock_migratetype(page); set_pageblock_migratetype(page, MIGRATE_ISOLATE); + zone->nr_isolate_pageblock++; nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); __mod_zone_freepage_state(zone, -nr_pages, migratetype); @@ -83,6 +84,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) nr_pages = move_freepages_block(zone, page, migratetype); __mod_zone_freepage_state(zone, nr_pages, migratetype); set_pageblock_migratetype(page, migratetype); + zone->nr_isolate_pageblock--; out: spin_unlock_irqrestore(&zone->lock, flags); } From 51bb1a4093cc68bc16b282548d9cee6104be0ef1 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:14 -0800 Subject: [PATCH 122/138] mm/page_alloc: add freepage on isolate pageblock to correct buddy list In free_pcppages_bulk(), we use cached migratetype of freepage to determine type of buddy list where freepage will be added. This information is stored when freepage is added to pcp list, so if isolation of pageblock of this freepage begins after storing, this cached information could be stale. In other words, it has original migratetype rather than MIGRATE_ISOLATE. There are two problems caused by this stale information. One is that we can't keep these freepages from being allocated. Although this pageblock is isolated, freepage will be added to normal buddy list so that it could be allocated without any restriction. And the other problem is incorrect freepage accounting. Freepages on isolate pageblock should not be counted for number of freepage. Following is the code snippet in free_pcppages_bulk(). /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); if (likely(!is_migrate_isolate_page(page))) { __mod_zone_page_state(zone, NR_FREE_PAGES, 1); if (is_migrate_cma(mt)) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); } As you can see above snippet, current code already handle second problem, incorrect freepage accounting, by re-fetching pageblock migratetype through is_migrate_isolate_page(page). But, because this re-fetched information isn't used for __free_one_page(), first problem would not be solved. This patch try to solve this situation to re-fetch pageblock migratetype before __free_one_page() and to use it for __free_one_page(). In addition to move up position of this re-fetch, this patch use optimization technique, re-fetching migratetype only if there is isolate pageblock. Pageblock isolation is rare event, so we can avoid re-fetching in common case with this optimization. This patch also correct migratetype of the tracepoint output. Signed-off-by: Joonsoo Kim Acked-by: Minchan Kim Acked-by: Michal Nazarewicz Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df1da25b309b12..58923bea0d8b51 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -715,14 +715,17 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* must delete as __free_one_page list manipulates */ list_del(&page->lru); mt = get_freepage_migratetype(page); + if (unlikely(has_isolate_pageblock(zone))) { + mt = get_pageblock_migratetype(page); + if (is_migrate_isolate(mt)) + goto skip_counting; + } + __mod_zone_freepage_state(zone, 1, mt); + +skip_counting: /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); - if (likely(!is_migrate_isolate_page(page))) { - __mod_zone_page_state(zone, NR_FREE_PAGES, 1); - if (is_migrate_cma(mt)) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); - } } while (--to_free && --batch_free && !list_empty(list)); } spin_unlock(&zone->lock); From 8f82b55dd558a74fc33d69a1f2c2605d0cd2c908 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:18 -0800 Subject: [PATCH 123/138] mm/page_alloc: move freepage counting logic to __free_one_page() All the caller of __free_one_page() has similar freepage counting logic, so we can move it to __free_one_page(). This reduce line of code and help future maintenance. This is also preparation step for "mm/page_alloc: restrict max order of merging on isolated pageblock" which fix the freepage counting problem on freepage with more than pageblock order. Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 58923bea0d8b51..9f689f16b5aaac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -577,6 +577,8 @@ static inline void __free_one_page(struct page *page, return; VM_BUG_ON(migratetype == -1); + if (!is_migrate_isolate(migratetype)) + __mod_zone_freepage_state(zone, 1 << order, migratetype); page_idx = pfn & ((1 << MAX_ORDER) - 1); @@ -715,14 +717,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* must delete as __free_one_page list manipulates */ list_del(&page->lru); mt = get_freepage_migratetype(page); - if (unlikely(has_isolate_pageblock(zone))) { + if (unlikely(has_isolate_pageblock(zone))) mt = get_pageblock_migratetype(page); - if (is_migrate_isolate(mt)) - goto skip_counting; - } - __mod_zone_freepage_state(zone, 1, mt); -skip_counting: /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); @@ -745,12 +742,7 @@ static void free_one_page(struct zone *zone, if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); - if (is_migrate_isolate(migratetype)) - goto skip_counting; } - __mod_zone_freepage_state(zone, 1 << order, migratetype); - -skip_counting: __free_one_page(page, pfn, zone, order, migratetype); spin_unlock(&zone->lock); } From 3c605096d3158216ba9326a16266f6ba128c2c8d Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:21 -0800 Subject: [PATCH 124/138] mm/page_alloc: restrict max order of merging on isolated pageblock Current pageblock isolation logic could isolate each pageblock individually. This causes freepage accounting problem if freepage with pageblock order on isolate pageblock is merged with other freepage on normal pageblock. We can prevent merging by restricting max order of merging to pageblock order if freepage is on isolate pageblock. A side-effect of this change is that there could be non-merged buddy freepage even if finishing pageblock isolation, because undoing pageblock isolation is just to move freepage from isolate buddy list to normal buddy list rather than to consider merging. So, the patch also makes undoing pageblock isolation consider freepage merge. When un-isolation, freepage with more than pageblock order and it's buddy are checked. If they are on normal pageblock, instead of just moving, we isolate the freepage and free it in order to get merged. Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 25 +++++++++++++++++++++++++ mm/page_alloc.c | 41 ++++++++++++++--------------------------- mm/page_isolation.c | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 29 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 829304090b90e8..a4f90ba7068ef0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); /* * in mm/page_alloc.c */ + +/* + * Locate the struct page for both the matching buddy in our + * pair (buddy1) and the combined O(n+1) page they form (page). + * + * 1) Any buddy B1 will have an order O twin B2 which satisfies + * the following equation: + * B2 = B1 ^ (1 << O) + * For example, if the starting buddy (buddy2) is #8 its order + * 1 buddy is #10: + * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 + * + * 2) Any buddy B will have an order O+1 parent P which + * satisfies the following equation: + * P = B & ~(1 << O) + * + * Assumption: *_mem_map is contiguous at least up to MAX_ORDER + */ +static inline unsigned long +__find_buddy_index(unsigned long page_idx, unsigned int order) +{ + return page_idx ^ (1 << order); +} + +extern int __isolate_free_page(struct page *page, unsigned int order); extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f689f16b5aaac..fd11b913779e93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -466,29 +466,6 @@ static inline void rmv_page_order(struct page *page) set_page_private(page, 0); } -/* - * Locate the struct page for both the matching buddy in our - * pair (buddy1) and the combined O(n+1) page they form (page). - * - * 1) Any buddy B1 will have an order O twin B2 which satisfies - * the following equation: - * B2 = B1 ^ (1 << O) - * For example, if the starting buddy (buddy2) is #8 its order - * 1 buddy is #10: - * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 - * - * 2) Any buddy B will have an order O+1 parent P which - * satisfies the following equation: - * P = B & ~(1 << O) - * - * Assumption: *_mem_map is contiguous at least up to MAX_ORDER - */ -static inline unsigned long -__find_buddy_index(unsigned long page_idx, unsigned int order) -{ - return page_idx ^ (1 << order); -} - /* * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if @@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page, unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; + int max_order = MAX_ORDER; VM_BUG_ON(!zone_is_initialized(zone)); @@ -577,15 +555,24 @@ static inline void __free_one_page(struct page *page, return; VM_BUG_ON(migratetype == -1); - if (!is_migrate_isolate(migratetype)) + if (is_migrate_isolate(migratetype)) { + /* + * We restrict max order of merging to prevent merge + * between freepages on isolate pageblock and normal + * pageblock. Without this, pageblock isolation + * could cause incorrect freepage accounting. + */ + max_order = min(MAX_ORDER, pageblock_order + 1); + } else { __mod_zone_freepage_state(zone, 1 << order, migratetype); + } - page_idx = pfn & ((1 << MAX_ORDER) - 1); + page_idx = pfn & ((1 << max_order) - 1); VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); - while (order < MAX_ORDER-1) { + while (order < max_order - 1) { buddy_idx = __find_buddy_index(page_idx, order); buddy = page + (buddy_idx - page_idx); if (!page_is_buddy(page, buddy, order)) @@ -1486,7 +1473,7 @@ void split_page(struct page *page, unsigned int order) } EXPORT_SYMBOL_GPL(split_page); -static int __isolate_free_page(struct page *page, unsigned int order) +int __isolate_free_page(struct page *page, unsigned int order) { unsigned long watermark; struct zone *zone; diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 1fa4a4d72ecc47..c8778f7e208e8a 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -76,17 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) { struct zone *zone; unsigned long flags, nr_pages; + struct page *isolated_page = NULL; + unsigned int order; + unsigned long page_idx, buddy_idx; + struct page *buddy; zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - nr_pages = move_freepages_block(zone, page, migratetype); - __mod_zone_freepage_state(zone, nr_pages, migratetype); + + /* + * Because freepage with more than pageblock_order on isolated + * pageblock is restricted to merge due to freepage counting problem, + * it is possible that there is free buddy page. + * move_freepages_block() doesn't care of merge so we need other + * approach in order to merge them. Isolation and free will make + * these pages to be merged. + */ + if (PageBuddy(page)) { + order = page_order(page); + if (order >= pageblock_order) { + page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); + buddy_idx = __find_buddy_index(page_idx, order); + buddy = page + (buddy_idx - page_idx); + + if (!is_migrate_isolate_page(buddy)) { + __isolate_free_page(page, order); + set_page_refcounted(page); + isolated_page = page; + } + } + } + + /* + * If we isolate freepage with more than pageblock_order, there + * should be no freepage in the range, so we could avoid costly + * pageblock scanning for freepage moving. + */ + if (!isolated_page) { + nr_pages = move_freepages_block(zone, page, migratetype); + __mod_zone_freepage_state(zone, nr_pages, migratetype); + } set_pageblock_migratetype(page, migratetype); zone->nr_isolate_pageblock--; out: spin_unlock_irqrestore(&zone->lock, flags); + if (isolated_page) + __free_pages(isolated_page, order); } static inline struct page * From 95069ac8da4975120ba76e968fc72948582c3509 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:25 -0800 Subject: [PATCH 125/138] mm/slab: fix unalignment problem on Malta with EVA due to slab merge Unlike SLUB, sometimes, object isn't started at the beginning of the slab in SLAB. This causes the unalignment problem after slab merging is supported by commit 12220dea07f1 ("mm/slab: support slab merge"). Following is the report from Markos that fail to boot on Malta with EVA. Calibrating delay loop... 19.86 BogoMIPS (lpj=99328) pid_max: default: 32768 minimum: 301 Mount-cache hash table entries: 4096 (order: 0, 16384 bytes) Mountpoint-cache hash table entries: 4096 (order: 0, 16384 bytes) Kernel bug detected[#1]: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.17.0-05639-g12220dea07f1 #1631 task: 1f04f5d8 ti: 1f050000 task.ti: 1f050000 epc : 80141190 alloc_unbound_pwq+0x234/0x304 Not tainted ra : 80141184 alloc_unbound_pwq+0x228/0x304 Process swapper/0 (pid: 1, threadinfo=1f050000, task=1f04f5d8, tls=00000000) Call Trace: alloc_unbound_pwq+0x234/0x304 apply_workqueue_attrs+0x11c/0x294 __alloc_workqueue_key+0x23c/0x470 init_workqueues+0x320/0x400 do_one_initcall+0xe8/0x23c kernel_init_freeable+0x9c/0x224 kernel_init+0x10/0x100 ret_from_kernel_thread+0x14/0x1c [ end trace cb88537fdc8fa200 ] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b alloc_unbound_pwq() allocates slab object from pool_workqueue. This kmem_cache requires 256 bytes alignment, but, current merging code doesn't honor that, and merge it with kmalloc-256. kmalloc-256 requires only cacheline size alignment so that above failure occurs. However, in x86, kmalloc-256 is luckily aligned in 256 bytes, so the problem didn't happen on it. To fix this problem, this patch introduces alignment mismatch check in find_mergeable(). This will fix the problem. Signed-off-by: Joonsoo Kim Reported-by: Markos Chandras Tested-by: Markos Chandras Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slab_common.c b/mm/slab_common.c index 406944207b61db..dcdab81bd240ba 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, if (s->size - size >= sizeof(void *)) continue; + if (IS_ENABLED(CONFIG_SLAB) && align && + (align > s->align || s->align % align)) + continue; + return s; } return NULL; From dae803e165a11bc88ca8dbc07a11077caf97bbcb Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 13 Nov 2014 15:19:27 -0800 Subject: [PATCH 126/138] mm: alloc_contig_range: demote pages busy message from warn to info Having test_pages_isolated failure message as a warning confuses users into thinking that it is more serious than it really is. In reality, if called via CMA, allocation will be retried so a single test_pages_isolated failure does not prevent allocation from succeeding. Demote the warning message to an info message and reformat it such that the text "failed" does not appear and instead a less worrying "PFNS busy" is used. This message is trivially reproducible on a 10GB x86 machine on 3.16.y kernels configured with CONFIG_DMA_CMA. Signed-off-by: Michal Nazarewicz Cc: Laurent Pinchart Cc: Peter Hurley Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd11b913779e93..181dc593962b69 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6397,13 +6397,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", - outer_start, end); + pr_info("%s: [%lx, %lx) PFNs busy\n", + __func__, outer_start, end); ret = -EBUSY; goto done; } - /* Grab isolated pages from freelists. */ outer_end = isolate_freepages_range(&cc, outer_start, end); if (!outer_end) { From 1d5bfe1ffb5b9014ea80bcd8a40ae43a3e213120 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 13 Nov 2014 15:19:30 -0800 Subject: [PATCH 127/138] mm, compaction: prevent infinite loop in compact_zone Several people have reported occasionally seeing processes stuck in compact_zone(), even triggering soft lockups, in 3.18-rc2+. Testing a revert of commit e14c720efdd7 ("mm, compaction: remember position within pageblock in free pages scanner") fixed the issue, although the stuck processes do not appear to involve the free scanner. Finally, by code inspection, the bug was found in isolate_migratepages() which uses a slightly different condition to detect if the migration and free scanners have met, than compact_finished(). That has not been a problem until commit e14c720efdd7 allowed the free scanner position between individual invocations to be in the middle of a pageblock. In a relatively rare case, the migration scanner position can end up at the beginning of a pageblock, with the free scanner position in the middle of the same pageblock. If it's the migration scanner's turn, isolate_migratepages() exits immediately (without updating the position), while compact_finished() decides to continue compaction, resulting in a potentially infinite loop. The system can recover only if another process creates enough high-order pages to make the watermark checks in compact_finished() pass. This patch fixes the immediate problem by bumping the migration scanner's position to meet the free scanner in isolate_migratepages(), when both are within the same pageblock. This causes compact_finished() to terminate properly. A more robust check in compact_finished() is planned as a cleanup for better future maintainability. Fixes: e14c720efdd73 ("mm, compaction: remember position within pageblock in free pages scanner) Signed-off-by: Vlastimil Babka Reported-by: P. Christeas Tested-by: P. Christeas Link: http://marc.info/?l=linux-mm&m=141508604232522&w=2 Reported-by: Norbert Preining Tested-by: Norbert Preining Link: https://lkml.org/lkml/2014/11/4/904 Reported-by: Pavel Machek Link: https://lkml.org/lkml/2014/11/7/164 Cc: Joonsoo Kim Cc: David Rientjes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 4f0151cfd2387e..f9792ba3537ccc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1039,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } acct_isolated(zone, cc); - /* Record where migration scanner will be restarted */ - cc->migrate_pfn = low_pfn; + /* + * Record where migration scanner will be restarted. If we end up in + * the same pageblock as the free scanner, make the scanners fully + * meet so that compact_finished() terminates compaction. + */ + cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn; return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; } From 8edc6e1688fc8f02c8c1f53a2ec4928cb1055f4d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 Nov 2014 15:19:33 -0800 Subject: [PATCH 128/138] fanotify: fix notification of groups with inode & mount marks fsnotify() needs to merge inode and mount marks lists when notifying groups about events so that ignore masks from inode marks are reflected in mount mark notifications and groups are notified in proper order (according to priorities). Currently the sorting of the lists done by fsnotify_add_inode_mark() / fsnotify_add_vfsmount_mark() and fsnotify() differed which resulted ignore masks not being used in some cases. Fix the problem by always using the same comparison function when sorting / merging the mark lists. Thanks to Heinrich Schuchardt for improvements of my patch. Link: https://bugzilla.kernel.org/show_bug.cgi?id=87721 Signed-off-by: Jan Kara Reported-by: Heinrich Schuchardt Tested-by: Heinrich Schuchardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/fsnotify.c | 36 +++++++++++++++++++++--------------- fs/notify/fsnotify.h | 4 ++++ fs/notify/inode_mark.c | 8 +++----- fs/notify/mark.c | 36 ++++++++++++++++++++++++++++++++++++ fs/notify/vfsmount_mark.c | 8 +++----- 5 files changed, 67 insertions(+), 25 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066aaf..89326acd45615e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, &fsnotify_mark_srcu); } + /* + * We need to merge inode & vfsmount mark lists so that inode mark + * ignore masks are properly reflected for mount mark notifications. + * That's why this traversal is so complicated... + */ while (inode_node || vfsmount_node) { - inode_group = vfsmount_group = NULL; + inode_group = NULL; + inode_mark = NULL; + vfsmount_group = NULL; + vfsmount_mark = NULL; if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), @@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_group = vfsmount_mark->group; } - if (inode_group > vfsmount_group) { - /* handle inode */ - ret = send_to_group(to_tell, inode_mark, NULL, mask, - data, data_is, cookie, file_name); - /* we didn't use the vfsmount_mark */ - vfsmount_group = NULL; - } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, - data, data_is, cookie, file_name); - inode_group = NULL; - } else { - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, - file_name); + if (inode_group && vfsmount_group) { + int cmp = fsnotify_compare_groups(inode_group, + vfsmount_group); + if (cmp > 0) { + inode_group = NULL; + inode_mark = NULL; + } else if (cmp < 0) { + vfsmount_group = NULL; + vfsmount_mark = NULL; + } } + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, + data, data_is, cookie, file_name); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe1ce..3b68b0ae0a97cb 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; +/* compare two groups for sorting of marks lists */ +extern int fsnotify_compare_groups(struct fsnotify_group *a, + struct fsnotify_group *b); + extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); /* add a mark to an inode */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index e8497144b32342..dfbf5447eea4ce 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, { struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e78f6..34c38fabf514f1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -209,6 +209,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas mark->ignored_mask = mask; } +/* + * Sorting function for lists of fsnotify marks. + * + * Fanotify supports different notification classes (reflected as priority of + * notification group). Events shall be passed to notification groups in + * decreasing priority order. To achieve this marks in notification lists for + * inodes and vfsmounts are sorted so that priorities of corresponding groups + * are descending. + * + * Furthermore correct handling of the ignore mask requires processing inode + * and vfsmount marks of each group together. Using the group address as + * further sort criterion provides a unique sorting order and thus we can + * merge inode and vfsmount lists of marks in linear time and find groups + * present in both lists. + * + * A return value of 1 signifies that b has priority over a. + * A return value of 0 signifies that the two marks have to be handled together. + * A return value of -1 signifies that a has priority over b. + */ +int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) +{ + if (a == b) + return 0; + if (!a) + return 1; + if (!b) + return -1; + if (a->priority < b->priority) + return 1; + if (a->priority > b->priority) + return -1; + if (a < b) + return 1; + return -1; +} + /* * Attach an initialized mark to a given group and fs object. * These marks may be used for the fsnotify backend to determine which diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b193..faefa72a11ebaa 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, struct mount *m = real_mount(mnt); struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); From 57cbc87e03c2f473d8f0579186c078ee06f48f2c Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:36 -0800 Subject: [PATCH 129/138] mm/debug-pagealloc: correct freepage accounting and order resetting One thing I did in this patch is fixing freepage accounting. If we clear guard page and link it onto isolate buddy list, we should not increase freepage count. This patch adds conditional branch to skip counting in this case. Without this patch, this overcounting happens frequently if guard order is set and CMA is used. Another thing fixed in this patch is the target to reset order. In __free_one_page(), we check the buddy page whether it is a guard page or not. And, if so, we should clear guard attribute on the buddy page and reset order of it to 0. But, current code resets original page's order rather than buddy one's. Maybe, this doesn't have any problem, because whole merged page's order will be re-assigned soon. But, it is better to correct code. Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Gioh Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 181dc593962b69..616a2c956b4b2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -583,9 +583,11 @@ static inline void __free_one_page(struct page *page, */ if (page_is_guard(buddy)) { clear_page_guard_flag(buddy); - set_page_private(page, 0); - __mod_zone_freepage_state(zone, 1 << order, - migratetype); + set_page_private(buddy, 0); + if (!is_migrate_isolate(migratetype)) { + __mod_zone_freepage_state(zone, 1 << order, + migratetype); + } } else { list_del(&buddy->lru); zone->free_area[order].nr_free--; From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 13 Nov 2014 15:19:39 -0800 Subject: [PATCH 130/138] mem-hotplug: reset node managed pages when hot-adding a new pgdat In free_area_init_core(), zone->managed_pages is set to an approximate value for lowmem, and will be adjusted when the bootmem allocator frees pages into the buddy system. But free_area_init_core() is also called by hotadd_new_pgdat() when hot-adding memory. As a result, zone->managed_pages of the newly added node's pgdat is set to an approximate value in the very beginning. Even if the memory on that node has node been onlined, /sys/device/system/node/nodeXXX/meminfo has wrong value: hot-add node2 (memory not onlined) cat /sys/device/system/node/node2/meminfo Node 2 MemTotal: 33554432 kB Node 2 MemFree: 0 kB Node 2 MemUsed: 33554432 kB Node 2 Active: 0 kB This patch fixes this problem by reset node managed pages to 0 after hot-adding a new node. 1. Move reset_managed_pages_done from reset_node_managed_pages() to reset_all_zones_managed_pages() 2. Make reset_node_managed_pages() non-static 3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat is initialized Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu Cc: [3.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + mm/bootmem.c | 9 +++++---- mm/memory_hotplug.c | 9 +++++++++ mm/nobootmem.c | 8 +++++--- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e2bd4c95b66ff..0995c2de8162c2 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long free_all_bootmem(void); +extern void reset_node_managed_pages(pg_data_t *pgdat); extern void reset_all_zones_managed_pages(void); extern void free_bootmem_node(pg_data_t *pgdat, diff --git a/mm/bootmem.c b/mm/bootmem.c index 8a000cebb0d742..477be696511d66 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) static int reset_managed_pages_done __initdata; -static inline void __init reset_node_managed_pages(pg_data_t *pgdat) +void reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - if (reset_managed_pages_done) - return; - for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) z->managed_pages = 0; } @@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void) { struct pglist_data *pgdat; + if (reset_managed_pages_done) + return; + for_each_online_pgdat(pgdat) reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 252e1dbbed86e9..c5f7fe199a60f1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1096,6 +1097,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) build_all_zonelists(pgdat, NULL); mutex_unlock(&zonelists_mutex); + /* + * zone->managed_pages is set to an approximate value in + * free_area_init_core(), which will cause + * /sys/device/system/node/nodeX/meminfo has wrong data. + * So reset it to 0 before any memory is onlined. + */ + reset_node_managed_pages(pgdat); + return pgdat; } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 7c7ab32ee5032d..90b50468333e38 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void) static int reset_managed_pages_done __initdata; -static inline void __init reset_node_managed_pages(pg_data_t *pgdat) +void reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - if (reset_managed_pages_done) - return; for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) z->managed_pages = 0; } @@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void) { struct pglist_data *pgdat; + if (reset_managed_pages_done) + return; + for_each_online_pgdat(pgdat) reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } From 0bd854200873894a76f32603ff2c4c988ad6b5b5 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 13 Nov 2014 15:19:41 -0800 Subject: [PATCH 131/138] mem-hotplug: reset node present pages when hot-adding a new pgdat When memory is hot-added, all the memory is in offline state. So clear all zones' present_pages because they will be updated in online_pages() and offline_pages(). Otherwise, /proc/zoneinfo will corrupt: When the memory of node2 is offline: # cat /proc/zoneinfo ...... Node 2, zone Movable ...... spanned 8388608 present 8388608 managed 0 When we online memory on node2: # cat /proc/zoneinfo ...... Node 2, zone Movable ...... spanned 8388608 present 16777216 managed 8388608 Signed-off-by: Tang Chen Reviewed-by: Yasuaki Ishimatsu Cc: [3.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c5f7fe199a60f1..1bf4807cb21e49 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1067,6 +1067,16 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +static void reset_node_present_pages(pg_data_t *pgdat) +{ + struct zone *z; + + for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) + z->present_pages = 0; + + pgdat->node_present_pages = 0; +} + /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) { @@ -1105,6 +1115,13 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) */ reset_node_managed_pages(pgdat); + /* + * When memory is hot-added, all the memory is in offline state. So + * clear all zones' present_pages because they will be updated in + * online_pages() and offline_pages(). + */ + reset_node_present_pages(pgdat); + return pgdat; } From bc53a3f46de8f3b2e28d46106216f3a759be8705 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Thu, 13 Nov 2014 15:19:44 -0800 Subject: [PATCH 132/138] kernel/panic.c: update comments for print_tainted Commit 69361eef9056 ("panic: add TAINT_SOFTLOCKUP") added the 'L' flag, but failed to update the comments for print_tainted(). So, update the comments. Signed-off-by: Xie XiuQi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/panic.c b/kernel/panic.c index d09dc5c32c6740..cf80672b79246d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -244,6 +244,7 @@ static const struct tnt tnts[] = { * 'I' - Working around severe firmware bug. * 'O' - Out-of-tree module has been loaded. * 'E' - Unsigned module has been loaded. + * 'L' - A soft lockup has previously occurred. * * The string is overwritten by the next call to print_tainted(). */ From 8fe671fc0b9f5a0d013153a9ddbd979d6eff0547 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Thu, 13 Nov 2014 15:19:47 -0800 Subject: [PATCH 133/138] MAINTAINERS: add IIO include files Files under include/linux/iio were not reported as part of the IIO subsystem. Signed-off-by: Daniel Baluta Reported-by: Cristina Ciocan Reviewed-by: Jingoo Han Cc: Hartmut Knaack Cc: Lars-Peter Clausen Cc: Peter Meerwald Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea4d0058fd1b68..60b1163dba28a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4716,6 +4716,7 @@ L: linux-iio@vger.kernel.org S: Maintained F: drivers/iio/ F: drivers/staging/iio/ +F: include/linux/iio/ IKANOS/ADI EAGLE ADSL USB DRIVER M: Matthieu Castet From a7ef82aee91f26da79b981b9f5bca43b8817d3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 8 Nov 2014 23:36:09 -0800 Subject: [PATCH 134/138] Input: alps - ignore bad data on Dell Latitudes E6440 and E7440 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes on Dell Latitude laptops psmouse/alps driver receive invalid ALPS protocol V3 packets with bit7 set in last byte. More often it can be reproduced on Dell Latitude E6440 or E7440 with closed lid and pushing cover above touchpad. If bit7 in last packet byte is set then it is not valid ALPS packet. I was told that ALPS devices never send these packets. It is not know yet who send those packets, it could be Dell EC, bug in BIOS and also bug in touchpad firmware... With this patch alps driver does not process those invalid packets, but instead of reporting PSMOUSE_BAD_DATA, getting into out of sync state, getting back in sync with the next byte and spam dmesg we return PSMOUSE_FULL_PACKET. If driver is truly out of sync we'll fail the checks on the next byte and report PSMOUSE_BAD_DATA then. Signed-off-by: Pali Rohár Tested-by: Pali Rohár Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 433638e7e4af56..d125a019383f10 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1186,12 +1186,27 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) } /* Bytes 2 - pktsize should have 0 in the highest bit */ - if ((priv->proto_version < ALPS_PROTO_V5) && + if (priv->proto_version < ALPS_PROTO_V5 && psmouse->pktcnt >= 2 && psmouse->pktcnt <= psmouse->pktsize && (psmouse->packet[psmouse->pktcnt - 1] & 0x80)) { psmouse_dbg(psmouse, "refusing packet[%i] = %x\n", psmouse->pktcnt - 1, psmouse->packet[psmouse->pktcnt - 1]); + + if (priv->proto_version == ALPS_PROTO_V3 && + psmouse->pktcnt == psmouse->pktsize) { + /* + * Some Dell boxes, such as Latitude E6440 or E7440 + * with closed lid, quite often smash last byte of + * otherwise valid packet with 0xff. Given that the + * next packet is very likely to be valid let's + * report PSMOUSE_FULL_PACKET but not process data, + * rather than reporting PSMOUSE_BAD_DATA and + * filling the logs. + */ + return PSMOUSE_FULL_PACKET; + } + return PSMOUSE_BAD_DATA; } From f386474e12a560e005ec7899e78f51f6bdc3cf41 Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Thu, 13 Nov 2014 17:45:12 -0800 Subject: [PATCH 135/138] Input: elantech - report the middle button of the touchpad In the past, no elantech was known with 3 touchpad mouse buttons. Fujitsu H730 is the first known elantech with a middle button. This commit enables this middle button. For backwards compatibility, the Fujitsu is detected via DMI, and only for this one 3 buttons will be announced. Reported-by: Stefan Valouch Signed-off-by: Ulrik De Bie Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index ce699eba9adc09..6d628f155c0839 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -563,6 +563,7 @@ static void elantech_input_sync_v4(struct psmouse *psmouse) } else { input_report_key(dev, BTN_LEFT, packet[0] & 0x01); input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); + input_report_key(dev, BTN_MIDDLE, packet[0] & 0x04); } input_mt_report_pointer_emulation(dev, true); @@ -1131,6 +1132,22 @@ static void elantech_set_buttonpad_prop(struct psmouse *psmouse) } } +/* + * Some hw_version 4 models do have a middle button + */ +static const struct dmi_system_id elantech_dmi_has_middle_button[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Fujitsu H730 has a middle button */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H730"), + }, + }, +#endif + { } +}; + /* * Set the appropriate event bits for the input subsystem */ @@ -1150,6 +1167,8 @@ static int elantech_set_input_params(struct psmouse *psmouse) __clear_bit(EV_REL, dev->evbit); __set_bit(BTN_LEFT, dev->keybit); + if (dmi_check_system(elantech_dmi_has_middle_button)) + __set_bit(BTN_MIDDLE, dev->keybit); __set_bit(BTN_RIGHT, dev->keybit); __set_bit(BTN_TOUCH, dev->keybit); From 2d9eb81fdb9f08df3a4b1638c1270a4453b40ac2 Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Thu, 13 Nov 2014 17:47:04 -0800 Subject: [PATCH 136/138] Input: elantech - provide a sysfs knob for crc_enabled The detection of crc_enabled is known to fail for Fujitsu H730. A DMI blacklist is added for that, but it can be expected that other laptops will pop up with this. Here a sysfs knob is provided to alter the behaviour of crc_enabled. Writing 0 or 1 to it sets the variable to 0 or 1. Reading it will show the crc_enabled variable (0 or 1). Reported-by: Stefan Valouch Signed-off-by: Ulrik De Bie Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 6d628f155c0839..3fcb6b3cb0bdae 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1330,6 +1330,7 @@ ELANTECH_INT_ATTR(reg_25, 0x25); ELANTECH_INT_ATTR(reg_26, 0x26); ELANTECH_INT_ATTR(debug, 0); ELANTECH_INT_ATTR(paritycheck, 0); +ELANTECH_INT_ATTR(crc_enabled, 0); static struct attribute *elantech_attrs[] = { &psmouse_attr_reg_07.dattr.attr, @@ -1344,6 +1345,7 @@ static struct attribute *elantech_attrs[] = { &psmouse_attr_reg_26.dattr.attr, &psmouse_attr_debug.dattr.attr, &psmouse_attr_paritycheck.dattr.attr, + &psmouse_attr_crc_enabled.dattr.attr, NULL }; From c6c748ef85c342d2726fc3c91c6a2af313af2360 Mon Sep 17 00:00:00 2001 From: Ulrik De Bie Date: Thu, 13 Nov 2014 17:48:06 -0800 Subject: [PATCH 137/138] Input: elantech - update the documentation A chapter is added to describe the trackpoint packets. A section is added to describe the behaviour of the knob crc_enabled in sysfs. The introduction of the documentation only mentioned v1/v2, but in the last part it already contains explanation of v3 and v4. The introduction is updated. Signed-off-by: Ulrik De Bie Signed-off-by: Dmitry Torokhov --- Documentation/input/elantech.txt | 81 +++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt index e1ae127ed099d4..1ec0db7879d310 100644 --- a/Documentation/input/elantech.txt +++ b/Documentation/input/elantech.txt @@ -38,22 +38,38 @@ Contents 7.2.1 Status packet 7.2.2 Head packet 7.2.3 Motion packet + 8. Trackpoint (for Hardware version 3 and 4) + 8.1 Registers + 8.2 Native relative mode 6 byte packet format + 8.2.1 Status Packet 1. Introduction ~~~~~~~~~~~~ -Currently the Linux Elantech touchpad driver is aware of two different -hardware versions unimaginatively called version 1 and version 2. Version 1 -is found in "older" laptops and uses 4 bytes per packet. Version 2 seems to -be introduced with the EeePC and uses 6 bytes per packet, and provides -additional features such as position of two fingers, and width of the touch. +Currently the Linux Elantech touchpad driver is aware of four different +hardware versions unimaginatively called version 1,version 2, version 3 +and version 4. Version 1 is found in "older" laptops and uses 4 bytes per +packet. Version 2 seems to be introduced with the EeePC and uses 6 bytes +per packet, and provides additional features such as position of two fingers, +and width of the touch. Hardware version 3 uses 6 bytes per packet (and +for 2 fingers the concatenation of two 6 bytes packets) and allows tracking +of up to 3 fingers. Hardware version 4 uses 6 bytes per packet, and can +combine a status packet with multiple head or motion packets. Hardware version +4 allows tracking up to 5 fingers. + +Some Hardware version 3 and version 4 also have a trackpoint which uses a +separate packet format. It is also 6 bytes per packet. The driver tries to support both hardware versions and should be compatible with the Xorg Synaptics touchpad driver and its graphical configuration utilities. +Note that a mouse button is also associated with either the touchpad or the +trackpoint when a trackpoint is available. Disabling the Touchpad in xorg +(TouchPadOff=0) will also disable the buttons associated with the touchpad. + Additionally the operation of the touchpad can be altered by adjusting the contents of some of its internal registers. These registers are represented by the driver as sysfs entries under /sys/bus/serio/drivers/psmouse/serio? @@ -78,7 +94,7 @@ completeness sake. 2. Extra knobs ~~~~~~~~~~~ -Currently the Linux Elantech touchpad driver provides two extra knobs under +Currently the Linux Elantech touchpad driver provides three extra knobs under /sys/bus/serio/drivers/psmouse/serio? for the user. * debug @@ -112,6 +128,20 @@ Currently the Linux Elantech touchpad driver provides two extra knobs under data consistency checking can be done. For now checking is disabled by default. Currently even turning it on will do nothing. +* crc_enabled + + Sets crc_enabled to 0/1. The name "crc_enabled" is the official name of + this integrity check, even though it is not an actual cyclic redundancy + check. + + Depending on the state of crc_enabled, certain basic data integrity + verification is done by the driver on hardware version 3 and 4. The + driver will reject any packet that appears corrupted. Using this knob, + The state of crc_enabled can be altered with this knob. + + Reading the crc_enabled value will show the active value. Echoing + "0" or "1" to this file will set the state to "0" or "1". + ///////////////////////////////////////////////////////////////////////////// 3. Differentiating hardware versions @@ -746,3 +776,42 @@ byte 5: byte 0 ~ 2 for one finger byte 3 ~ 5 for another + + +8. Trackpoint (for Hardware version 3 and 4) + ========================================= +8.1 Registers + ~~~~~~~~~ +No special registers have been identified. + +8.2 Native relative mode 6 byte packet format + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +8.2.1 Status Packet + ~~~~~~~~~~~~~ + +byte 0: + bit 7 6 5 4 3 2 1 0 + 0 0 sx sy 0 M R L +byte 1: + bit 7 6 5 4 3 2 1 0 + ~sx 0 0 0 0 0 0 0 +byte 2: + bit 7 6 5 4 3 2 1 0 + ~sy 0 0 0 0 0 0 0 +byte 3: + bit 7 6 5 4 3 2 1 0 + 0 0 ~sy ~sx 0 1 1 0 +byte 4: + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 +byte 5: + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + + x and y are written in two's complement spread + over 9 bits with sx/sy the relative top bit and + x7..x0 and y7..y0 the lower bits. + ~sx is the inverse of sx, ~sy is the inverse of sy. + The sign of y is opposite to what the input driver + expects for a relative movement From eaca2d8e75e90a70a63a6695c9f61932609db212 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 11 Nov 2014 17:16:44 +0100 Subject: [PATCH 138/138] firewire: cdev: prevent kernel stack leaking into ioctl arguments Found by the UC-KLEE tool: A user could supply less input to firewire-cdev ioctls than write- or write/read-type ioctl handlers expect. The handlers used data from uninitialized kernel stack then. This could partially leak back to the user if the kernel subsequently generated fw_cdev_event_'s (to be read from the firewire-cdev fd) which notably would contain the _u64 closure field which many of the ioctl argument structures contain. The fact that the handlers would act on random garbage input is a lesser issue since all handlers must check their input anyway. The fix simply always null-initializes the entire ioctl argument buffer regardless of the actual length of expected user input. That is, a runtime overhead of memset(..., 40) is added to each firewirew-cdev ioctl() call. [Comment from Clemens Ladisch: This part of the stack is most likely to be already in the cache.] Remarks: - There was never any leak from kernel stack to the ioctl output buffer itself. IOW, it was not possible to read kernel stack by a read-type or write/read-type ioctl alone; the leak could at most happen in combination with read()ing subsequent event data. - The actual expected minimum user input of each ioctl from include/uapi/linux/firewire-cdev.h is, in bytes: [0x00] = 32, [0x05] = 4, [0x0a] = 16, [0x0f] = 20, [0x14] = 16, [0x01] = 36, [0x06] = 20, [0x0b] = 4, [0x10] = 20, [0x15] = 20, [0x02] = 20, [0x07] = 4, [0x0c] = 0, [0x11] = 0, [0x16] = 8, [0x03] = 4, [0x08] = 24, [0x0d] = 20, [0x12] = 36, [0x17] = 12, [0x04] = 20, [0x09] = 24, [0x0e] = 4, [0x13] = 40, [0x18] = 4. Reported-by: David Ramos Cc: Signed-off-by: Stefan Richter --- drivers/firewire/core-cdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 5d997a33907e43..2a3973a7c44179 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1637,8 +1637,7 @@ static int dispatch_ioctl(struct client *client, _IOC_SIZE(cmd) > sizeof(buffer)) return -ENOTTY; - if (_IOC_DIR(cmd) == _IOC_READ) - memset(&buffer, 0, _IOC_SIZE(cmd)); + memset(&buffer, 0, sizeof(buffer)); if (_IOC_DIR(cmd) & _IOC_WRITE) if (copy_from_user(&buffer, arg, _IOC_SIZE(cmd)))