diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index f7e32f218f737f..e82fc37be80225 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -43,7 +43,7 @@ Description: read only This sysfs interface exposes the number of cores per chip present in the system. -What: /sys/devices/hv_24x7/interface/cpumask +What: /sys/devices/hv_24x7/cpumask Date: July 2020 Contact: Linux on PowerPC Developer List Description: read only diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdc1f33fd3d10a..a1068742a6df11 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1233,8 +1233,7 @@ efi= [EFI] Format: { "debug", "disable_early_pci_dma", "nochunk", "noruntime", "nosoftreserve", - "novamap", "no_disable_early_pci_dma", - "old_map" } + "novamap", "no_disable_early_pci_dma" } debug: enable misc debug output. disable_early_pci_dma: disable the busmaster bit on all PCI bridges while in the EFI boot stub. @@ -1251,8 +1250,6 @@ novamap: do not call SetVirtualAddressMap(). no_disable_early_pci_dma: Leave the busmaster bit set on all PCI bridges while in the EFI boot stub - old_map [X86-64]: switch to the old ioremap-based EFI - runtime services mapping. [Needs CONFIG_X86_UV=y] efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml index 08678af5ed9364..8ce5ed8a58dd76 100644 --- a/Documentation/devicetree/bindings/net/renesas,ether.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml @@ -59,9 +59,15 @@ properties: clocks: maxItems: 1 - pinctrl-0: true + power-domains: + maxItems: 1 + + resets: + maxItems: 1 - pinctrl-names: true + phy-mode: true + + phy-handle: true renesas,no-ether-link: type: boolean @@ -74,6 +80,11 @@ properties: specify when the Ether LINK signal is active-low instead of normal active-high +patternProperties: + "^ethernet-phy@[0-9a-f]$": + type: object + $ref: ethernet-phy.yaml# + required: - compatible - reg @@ -83,7 +94,8 @@ required: - '#address-cells' - '#size-cells' - clocks - - pinctrl-0 + +additionalProperties: false examples: # Lager board @@ -99,8 +111,6 @@ examples: clocks = <&mstp8_clks R8A7790_CLK_ETHER>; phy-mode = "rmii"; phy-handle = <&phy1>; - pinctrl-0 = <ðer_pins>; - pinctrl-names = "default"; renesas,ether-link-active-low; #address-cells = <1>; #size-cells = <0>; @@ -109,7 +119,5 @@ examples: reg = <1>; interrupt-parent = <&irqc0>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - pinctrl-0 = <&phy1_pins>; - pinctrl-names = "default"; }; }; diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index fdddb822d564b0..e005b458102327 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -9,6 +9,11 @@ #ifndef __ASSEMBLY__ +/* + * Added to include __machine_check_early_realmode_* functions + */ +#include + /* This structure can grow, it's real size is used by head.S code * via the mkdefs mechanism. */ diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 925cf89cbf4bac..6bfc87915d5dba 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index d635b96c7ea69b..7355ed05e65eda 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -15,11 +15,18 @@ #ifndef __ASSEMBLY__ #include +#include #define KASAN_SHADOW_SCALE_SHIFT 3 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) +#else +#define KASAN_KERN_START PAGE_OFFSET +#endif + #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index adf2cda67f9a40..89aa8248a57ddd 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -210,6 +210,9 @@ struct mce_error_info { #define MCE_EVENT_RELEASE true #define MCE_EVENT_DONTRELEASE false +struct pt_regs; +struct notifier_block; + extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr); @@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +long __machine_check_early_realmode_p7(struct pt_regs *regs); +long __machine_check_early_realmode_p8(struct pt_regs *regs); +long __machine_check_early_realmode_p9(struct pt_regs *regs); +long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 1e8b2e1ec1db6a..daec64d41b4478 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +/* To define perf extended regs mask value */ +extern u64 PERF_REG_EXTENDED_MASK; +#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 86c9eb064b22b4..f6acabb6c9be80 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -62,6 +62,11 @@ struct power_pmu { int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; + /* + * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if + * the pmu supports extended perf regs capability + */ + int capabilities; }; /* diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc84..bdf5f10f8b9f50 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,24 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 3d406a9626e86e..2aa89c6b289679 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -72,9 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power10(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -542,6 +539,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power10", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 6f8c0c6b937a17..8dc46f38680b25 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -64,10 +64,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -extern long __machine_check_early_realmode_p10(struct pt_regs *regs); - static int hv_mode; static struct { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b198b0ff25bcf2..808ec9fab6052f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) min = pvr & 0xFF; break; case 0x004e: /* POWER9 bits 12-15 give chip type */ + case 0x0080: /* POWER10 bit 12 gives SMT8/4 */ maj = (pvr >> 8) & 0x0F; min = pvr & 0xFF; break; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index c0162911f6cbdc..d426eaf76bb048 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; +#ifdef MODULES_VADDR + if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) + return false; + if (addr > ALIGN(MODULES_END, SZ_256M) - 1) + return false; +#else if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) return false; - if (addr >= ALIGN(VMALLOC_END, SZ_256M)) + if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) return false; +#endif return true; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1da9dbba92171b..890a71c5293eef 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,8 +1115,10 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); - if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) +#ifdef CONFIG_PPC_MEM_KEYS + if (mmu_has_feature(MMU_FTR_PKEY)) mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 78fe3498659427..93d20e1ed84514 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2141,6 +2141,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } } @@ -2323,6 +2327,7 @@ int register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV /* diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index cdb7bfbd157e0e..6e7e820508df7c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = { NULL, }; +static struct attribute *cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cpumask_attrs, +}; + static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, @@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, - &dev_attr_cpumask.attr, NULL, }; @@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = { &event_desc_group, &event_long_desc_group, &if_group, + &cpumask_attr_group, NULL, }; diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index a213a0aa5d25cf..8e53f2fc3fe07f 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -13,9 +13,11 @@ #include #include +u64 PERF_REG_EXTENDED_MASK; + #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1)) +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK)) static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]), @@ -69,10 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr), }; +/* Function to return the extended register values */ +static u64 get_ext_regs_value(int idx) +{ + switch (idx) { + case PERF_REG_POWERPC_MMCR0: + return mfspr(SPRN_MMCR0); + case PERF_REG_POWERPC_MMCR1: + return mfspr(SPRN_MMCR1); + case PERF_REG_POWERPC_MMCR2: + return mfspr(SPRN_MMCR2); +#ifdef CONFIG_PPC64 + case PERF_REG_POWERPC_MMCR3: + return mfspr(SPRN_MMCR3); + case PERF_REG_POWERPC_SIER2: + return mfspr(SPRN_SIER2); + case PERF_REG_POWERPC_SIER3: + return mfspr(SPRN_SIER3); +#endif + default: return 0; + } +} + u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) - return 0; + u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + perf_reg_extended_max = PERF_REG_MAX_ISA_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) + perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || @@ -85,6 +113,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; + if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + return get_ext_regs_value(idx); + + /* + * If the idx is referring to value beyond the + * supported registers, return 0 with a warning + */ + if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index f7cff7f36a1cd4..83148656b5241e 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -87,6 +87,8 @@ #define POWER10_MMCRA_IFM3 0x00000000C0000000UL #define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Table of alternatives, sorted by column 0 */ static const unsigned int power10_event_alternatives[][MAX_ALT] = { { PM_RUN_CYC_ALT, PM_RUN_CYC }, @@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = { .cache_events = &power10_cache_events, .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power10_pmu(void) @@ -408,6 +411,9 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + rc = register_power_pmu(&power10_pmu); if (rc) return rc; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 05dae38b969a5f..2a57e93a79dcf7 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -90,6 +90,8 @@ enum { #define POWER9_MMCRA_IFM3 0x00000000C0000000UL #define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = { .cache_events = &power9_cache_events, .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power9_pmu(void) @@ -457,6 +460,9 @@ int init_power9_pmu(void) } } + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300; + rc = register_power_pmu(&power9_pmu); if (rc) return rc; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c9c25fb0783c4c..023a4f987bb22d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (pe->dma_setup_done) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index c6e0d8abf75ea9..7a974ed6b2400e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -107,22 +107,28 @@ static int pseries_cpu_disable(void) */ static void pseries_cpu_die(unsigned int cpu) { - int tries; int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); + unsigned long timeout = jiffies + msecs_to_jiffies(120000); - for (tries = 0; tries < 25; tries++) { + while (true) { cpu_status = smp_query_cpu_stopped(pcpu); if (cpu_status == QCSS_STOPPED || cpu_status == QCSS_HARDWARE_ERROR) break; - cpu_relax(); + if (time_after(jiffies, timeout)) { + pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n", + cpu, pcpu); + timeout = jiffies + msecs_to_jiffies(120000); + } + + cond_resched(); } - if (cpu_status != 0) { - printk("Querying DEAD? cpu %i (%i) shows %i\n", - cpu, pcpu, cpu_status); + if (cpu_status == QCSS_HARDWARE_ERROR) { + pr_warn("CPU %i (hwid %i) reported error while dying\n", + cpu, pcpu); } /* Isolation and deallocation are definitely done by diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f3736fcd98fcb8..13c86a292c6d70 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98e4d8886f11c5..ae9b0d4615b325 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with * Fetch the per-CPU GSBASE value for this processor and put it in @reg. * We normally use %gs for accessing per-CPU data, but we are setting up * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. */ .macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID + LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq $VDSO_CPUNODE_MASK, \reg movq __per_cpu_offset(, \reg, 8), \reg .endm diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index cb94ba86efd28f..6a4ca27b2c9e12 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -390,6 +390,18 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"), + INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"), + INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"), + INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"), + { /* end: all zeroes */ }, }; @@ -405,13 +417,35 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE +/* BW break down- legacy counters */ +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3 +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048 + enum perf_snb_uncore_imc_freerunning_types { - SNB_PCI_UNCORE_IMC_DATA = 0, + SNB_PCI_UNCORE_IMC_DATA_READS = 0, + SNB_PCI_UNCORE_IMC_DATA_WRITES, + SNB_PCI_UNCORE_IMC_GT_REQUESTS, + SNB_PCI_UNCORE_IMC_IA_REQUESTS, + SNB_PCI_UNCORE_IMC_IO_REQUESTS, + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, }; static struct freerunning_counters snb_uncore_imc_freerunning[] = { - [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, }; static struct attribute *snb_uncore_imc_formats_attr[] = { @@ -525,6 +559,18 @@ static int snb_uncore_imc_event_init(struct perf_event *event) base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; idx = UNCORE_PMC_IDX_FREERUNNING; break; + case SNB_UNCORE_PCI_IMC_GT_REQUESTS: + base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IA_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IO_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; default: return -EINVAL; } @@ -598,7 +644,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { static struct intel_uncore_type snb_uncore_imc = { .name = "imc", - .num_counters = 2, + .num_counters = 5, .num_boxes = 1, .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, .mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE, diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b9c2667ac46cc6..bc9758ef292ef1 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -81,11 +81,8 @@ extern unsigned long efi_fw_vendor, efi_config_table; kernel_fpu_end(); \ }) - #define arch_efi_call_virt(p, f, args...) p->f(args) -#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) - #else /* !CONFIG_X86_32 */ #define EFI_LOADER_SIGNATURE "EL64" @@ -125,9 +122,6 @@ struct efi_scratch { kernel_fpu_end(); \ }) -extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, - u32 type, u64 attribute); - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -143,17 +137,13 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; -extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); extern void __init efi_print_memmap(void); -extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init old_map_region(efi_memory_desc_t *md); -extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f6ea8f1a9d57ae..d37ebe6e70d7a1 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -496,74 +495,6 @@ void __init efi_init(void) efi_print_memmap(); } -#if defined(CONFIG_X86_32) - -void __init efi_set_executable(efi_memory_desc_t *md, bool executable) -{ - u64 addr, npages; - - addr = md->virt_addr; - npages = md->num_pages; - - memrange_efi_to_native(&addr, &npages); - - if (executable) - set_memory_x(addr, npages); - else - set_memory_nx(addr, npages); -} - -void __init runtime_code_page_mkexec(void) -{ - efi_memory_desc_t *md; - - /* Make EFI runtime service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type != EFI_RUNTIME_SERVICES_CODE) - continue; - - efi_set_executable(md, true); - } -} - -void __init efi_memory_uc(u64 addr, unsigned long size) -{ - unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; - u64 npages; - - npages = round_up(size, page_shift) / page_shift; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); -} - -void __init old_map_region(efi_memory_desc_t *md) -{ - u64 start_pfn, end_pfn, end; - unsigned long size; - void *va; - - start_pfn = PFN_DOWN(md->phys_addr); - size = md->num_pages << PAGE_SHIFT; - end = md->phys_addr + size; - end_pfn = PFN_UP(end); - - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); - - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); - - md->virt_addr = (u64) (unsigned long) va; - if (!va) - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); -} - -#endif - /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 826ead67753da5..e06a199423c0fe 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -29,9 +29,35 @@ #include #include #include +#include #include #include +void __init efi_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc((unsigned long)va, md->num_pages); + } else { + va = ioremap_cache(md->phys_addr, size); + } + + md->virt_addr = (unsigned long)va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", md->phys_addr); +} + /* * To make EFI call EFI runtime service in physical addressing mode we need * prolog/epilog before/after the invocation to claim the EFI runtime service @@ -58,11 +84,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_map_region(efi_memory_desc_t *md) -{ - old_map_region(md); -} - void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} @@ -107,6 +128,15 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, void __init efi_runtime_update_mappings(void) { - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + if (__supported_pte_mask & _PAGE_NX) { + efi_memory_desc_t *md; + + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + set_memory_x(md->virt_addr, md->num_pages); + } + } } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 413583f904a695..6af4da1149bacf 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -259,6 +259,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; rodata = __pa(__start_rodata); pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { pr_err("Failed to map kernel rodata 1:1\n"); return 1; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 5860ca41185cf1..2acd9f9284a260 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1710,9 +1710,9 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) - tp_event = HW_EVENT_ERR_FATAL; - else tp_event = HW_EVENT_ERR_UNCORRECTED; + else + tp_event = HW_EVENT_ERR_FATAL; } else { tp_event = HW_EVENT_ERR_CORRECTED; } diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index fd363746f5b03a..b8fc4b84fd8672 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1155,7 +1155,7 @@ static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, u32 optypenum = GET_BITFIELD(m->status, 4, 6); int rc; - tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) : + tp_event = uc_err ? (ripv ? HW_EVENT_ERR_UNCORRECTED : HW_EVENT_ERR_FATAL) : HW_EVENT_ERR_CORRECTED; /* diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d414698ca32421..c5ab634cb6a49a 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2982,9 +2982,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - tp_event = HW_EVENT_ERR_FATAL; - } else { tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; } } else { tp_event = HW_EVENT_ERR_CORRECTED; diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 6d8d6dc626bfee..2b4ce8e5ac2fa6 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -493,9 +493,9 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - tp_event = HW_EVENT_ERR_FATAL; - } else { tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; } } else { tp_event = HW_EVENT_ERR_CORRECTED; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index fdd1db025dbfda..3aa07c3b513697 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -381,6 +381,7 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); + destroy_workqueue(efi_rts_wq); return -ENOMEM; } @@ -424,6 +425,7 @@ static int __init efisubsys_init(void) generic_ops_unregister(); err_put: kobject_put(efi_kobj); + destroy_workqueue(efi_rts_wq); return error; } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 6bca70bbb43d0d..f735db55adc037 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -187,20 +187,28 @@ int efi_printk(const char *fmt, ...) */ efi_status_t efi_parse_options(char const *cmdline) { - size_t len = strlen(cmdline) + 1; + size_t len; efi_status_t status; char *str, *buf; + if (!cmdline) + return EFI_SUCCESS; + + len = strnlen(cmdline, COMMAND_LINE_SIZE - 1) + 1; status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, len, (void **)&buf); if (status != EFI_SUCCESS) return status; - str = skip_spaces(memcpy(buf, cmdline, len)); + memcpy(buf, cmdline, len - 1); + buf[len - 1] = '\0'; + str = skip_spaces(buf); while (*str) { char *param, *val; str = next_arg(str, ¶m, &val); + if (!val && !strcmp(param, "--")) + break; if (!strcmp(param, "nokaslr")) { efi_nokaslr = true; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 415a37e44caeb7..c5d3032dd1a215 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2948,6 +2948,9 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_time_in_interval(bond, last_rx, 1)) { bond_propose_link_state(slave, BOND_LINK_UP); commit++; + } else if (slave->link == BOND_LINK_BACK) { + bond_propose_link_state(slave, BOND_LINK_FAIL); + commit++; } continue; } @@ -3056,6 +3059,19 @@ static void bond_ab_arp_commit(struct bonding *bond) continue; + case BOND_LINK_FAIL: + bond_set_slave_link_state(slave, BOND_LINK_FAIL, + BOND_SLAVE_NOTIFY_NOW); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); + + /* A slave has just been enslaved and has become + * the current active slave. + */ + if (rtnl_dereference(bond->curr_active_slave)) + RCU_INIT_POINTER(bond->current_arp_slave, NULL); + continue; + default: slave_err(bond->dev, slave->dev, "impossible: link_new_state %d on slave\n", @@ -3106,8 +3122,6 @@ static bool bond_ab_arp_probe(struct bonding *bond) return should_notify_rtnl; } - bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); - bond_for_each_slave_rcu(bond, slave, iter) { if (!found && !before && bond_slave_is_up(slave)) before = slave; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 6500179c2ca22e..0837ae0e0c5e21 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1554,6 +1554,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, return ret; switch (ret) { + case -ETIMEDOUT: + return ret; case -ENOSPC: dev_dbg(dev->dev, "{%pM,%.4d} no space left in ARL\n", addr, vid); diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig index f121619d81fef4..2d23ccef7d0edb 100644 --- a/drivers/net/dsa/ocelot/Kconfig +++ b/drivers/net/dsa/ocelot/Kconfig @@ -9,7 +9,7 @@ config NET_DSA_MSCC_FELIX select NET_DSA_TAG_OCELOT select FSL_ENETC_MDIO help - This driver supports network switches from the the Vitesse / + This driver supports network switches from the Vitesse / Microsemi / Microchip Ocelot family of switching cores that are connected to their host CPU via Ethernet. The following switches are supported: diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 2a6c9725e09225..a3a8edf9a734d0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2180,13 +2180,10 @@ static void ena_del_napi_in_range(struct ena_adapter *adapter, int i; for (i = first_index; i < first_index + count; i++) { - /* Check if napi was initialized before */ - if (!ENA_IS_XDP_INDEX(adapter, i) || - adapter->ena_napi[i].xdp_ring) - netif_napi_del(&adapter->ena_napi[i].napi); - else - WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && - adapter->ena_napi[i].xdp_ring); + netif_napi_del(&adapter->ena_napi[i].napi); + + WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].xdp_ring); } } @@ -3601,16 +3598,14 @@ static void ena_fw_reset_device(struct work_struct *work) { struct ena_adapter *adapter = container_of(work, struct ena_adapter, reset_task); - struct pci_dev *pdev = adapter->pdev; - if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - dev_err(&pdev->dev, - "device reset schedule while reset bit is off\n"); - return; - } rtnl_lock(); - ena_destroy_device(adapter, false); - ena_restore_device(adapter); + + if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + ena_destroy_device(adapter, false); + ena_restore_device(adapter); + } + rtnl_unlock(); } @@ -3692,7 +3687,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, } u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.missed_tx = missed_tx; + tx_ring->tx_stats.missed_tx += missed_tx; u64_stats_update_end(&tx_ring->syncp); return rc; @@ -4389,8 +4384,11 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) netdev->rx_cpu_rmap = NULL; } #endif /* CONFIG_RFS_ACCEL */ - del_timer_sync(&adapter->timer_service); + /* Make sure timer and reset routine won't be called after + * freeing device resources. + */ + del_timer_sync(&adapter->timer_service); cancel_work_sync(&adapter->reset_task); rtnl_lock(); /* lock released inside the below if-else block */ @@ -4558,6 +4556,9 @@ static void ena_keep_alive_wd(void *adapter_data, tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low; u64_stats_update_begin(&adapter->syncp); + /* These stats are accumulated by the device, so the counters indicate + * all drops since last reset. + */ adapter->dev_stats.rx_drops = rx_drops; adapter->dev_stats.tx_drops = tx_drops; u64_stats_update_end(&adapter->syncp); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 16a944707ba908..8941ac4df9e375 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -1631,8 +1631,8 @@ static int hw_atl_b0_get_mac_temp(struct aq_hw_s *self, u32 *temp) hw_atl_ts_reset_set(self, 0); } - err = readx_poll_timeout_atomic(hw_atl_b0_ts_ready_and_latch_high_get, - self, val, val == 1, 10000U, 500000U); + err = readx_poll_timeout(hw_atl_b0_ts_ready_and_latch_high_get, self, + val, val == 1, 10000U, 500000U); if (err) return err; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index d2b587d1670aa6..869431a1eedd48 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2553,19 +2553,22 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR); - flits = DIV_ROUND_UP(pkt_len + sizeof(struct cpl_tx_pkt) + - sizeof(*wr), sizeof(__be64)); + flits = DIV_ROUND_UP(pkt_len + sizeof(*cpl) + sizeof(*wr), + sizeof(__be64)); ndesc = flits_to_desc(flits); lb = &pi->ethtool_lb; lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; + __netif_tx_lock(q->txq, smp_processor_id()); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) + if (unlikely(credits < 0)) { + __netif_tx_unlock(q->txq); return -ENOMEM; + } wr = (void *)&q->q.desc[q->q.pidx]; memset(wr, 0, sizeof(struct tx_desc)); @@ -2598,6 +2601,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); + __netif_tx_unlock(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 66e67b24a887c5..62e271aea4a500 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2389,7 +2389,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) dev_info(dev, "probe %s ID %d\n", dev_name(dev), id); - netdev = alloc_etherdev_mq(sizeof(*port), TX_QUEUE_NUM); + netdev = devm_alloc_etherdev_mqs(dev, sizeof(*port), TX_QUEUE_NUM, TX_QUEUE_NUM); if (!netdev) { dev_err(dev, "Can't allocate ethernet device #%d\n", id); return -ENOMEM; @@ -2521,7 +2521,6 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) } port->netdev = NULL; - free_netdev(netdev); return ret; } @@ -2530,7 +2529,6 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev) struct gemini_ethernet_port *port = platform_get_drvdata(pdev); gemini_port_remove(port); - free_netdev(port->netdev); return 0; } diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index b513b8c5c3b5e5..41dd3d0f345244 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -750,8 +750,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) continue; err = gfar_parse_group(child, priv, model); - if (err) + if (err) { + of_node_put(child); goto err_grp_init; + } } } else { /* SQ_SG_MODE */ err = gfar_parse_group(np, priv, model); diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c index 9729983f48409d..c54b7f8243f3ba 100644 --- a/drivers/net/ethernet/sfc/ef100.c +++ b/drivers/net/ethernet/sfc/ef100.c @@ -142,7 +142,7 @@ static int ef100_pci_parse_continue_entry(struct efx_nic *efx, int entry_locatio /* Temporarily map new BAR. */ rc = efx_init_io(efx, bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -160,7 +160,7 @@ static int ef100_pci_parse_continue_entry(struct efx_nic *efx, int entry_locatio /* Put old BAR back. */ rc = efx_init_io(efx, previous_bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, previous_bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -334,7 +334,7 @@ static int ef100_pci_parse_xilinx_cap(struct efx_nic *efx, int vndr_cap, /* Temporarily map BAR. */ rc = efx_init_io(efx, bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -495,7 +495,7 @@ static int ef100_pci_probe(struct pci_dev *pci_dev, /* Set up basic I/O (BAR mappings etc) */ rc = efx_init_io(efx, fcw.bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, fcw.bar)); if (rc) goto fail; diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 206d70f9d95b0a..19fe86b3b3169b 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -431,18 +431,18 @@ static int ef100_reset(struct efx_nic *efx, enum reset_type reset_type) /* A RESET_TYPE_ALL will cause filters to be removed, so we remove filters * and reprobe after reset to avoid removing filters twice */ - down_read(&efx->filter_sem); + down_write(&efx->filter_sem); ef100_filter_table_down(efx); - up_read(&efx->filter_sem); + up_write(&efx->filter_sem); rc = efx_mcdi_reset(efx, reset_type); if (rc) return rc; netif_device_attach(efx->net_dev); - down_read(&efx->filter_sem); + down_write(&efx->filter_sem); rc = ef100_filter_table_up(efx); - up_read(&efx->filter_sem); + up_write(&efx->filter_sem); if (rc) return rc; @@ -739,6 +739,7 @@ const struct efx_nic_type ef100_pf_nic_type = { .rx_remove = efx_mcdi_rx_remove, .rx_write = ef100_rx_write, .rx_packet = __ef100_rx_packet, + .rx_buf_hash_valid = ef100_rx_buf_hash_valid, .fini_dmaq = efx_fini_dmaq, .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .filter_table_probe = ef100_filter_table_up, @@ -820,6 +821,7 @@ const struct efx_nic_type ef100_vf_nic_type = { .rx_remove = efx_mcdi_rx_remove, .rx_write = ef100_rx_write, .rx_packet = __ef100_rx_packet, + .rx_buf_hash_valid = ef100_rx_buf_hash_valid, .fini_dmaq = efx_fini_dmaq, .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .filter_table_probe = ef100_filter_table_up, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index dcb741d8bd119c..062462a1384756 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -846,6 +846,7 @@ struct efx_async_filter_insertion { * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds * @timer_max_ns: Interrupt timer maximum value, in nanoseconds * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues + * @irqs_hooked: Channel interrupts are hooked * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues * @irq_rx_moderation_us: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags @@ -1004,6 +1005,7 @@ struct efx_nic { unsigned int timer_quantum_ns; unsigned int timer_max_ns; bool irq_rx_adaptive; + bool irqs_hooked; unsigned int irq_mod_step_us; unsigned int irq_rx_moderation_us; u32 msg_enable; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index d994d136bb0380..d1e908846f5dd9 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -129,6 +129,7 @@ int efx_nic_init_interrupt(struct efx_nic *efx) #endif } + efx->irqs_hooked = true; return 0; fail2: @@ -154,6 +155,8 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) efx->net_dev->rx_cpu_rmap = NULL; #endif + if (!efx->irqs_hooked) + return; if (EFX_INT_MODE_USE_MSI(efx)) { /* Disable MSI/MSI-X interrupts */ efx_for_each_channel(channel, efx) @@ -163,6 +166,7 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) /* Disable legacy interrupt */ free_irq(efx->legacy_irq, efx); } + efx->irqs_hooked = false; } /* Register dump */ diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index ef9bca92b0b792..5e29284c89c982 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -849,6 +849,7 @@ void efx_remove_filters(struct efx_nic *efx) efx_for_each_channel(channel, efx) { cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); + channel->rps_flow_id = NULL; } #endif down_write(&efx->filter_sem); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 787f17e2a97168..64b0a74c152314 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -367,7 +367,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, } rcu_read_unlock(); - while (unlikely(txq >= ndev->real_num_tx_queues)) + while (txq >= ndev->real_num_tx_queues) txq -= ndev->real_num_tx_queues; return txq; @@ -502,7 +502,7 @@ static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev, int rc; skb->dev = vf_netdev; - skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; + skb_record_rx_queue(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); rc = dev_queue_xmit(skb); if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4942f6112e51f8..5da04e99798943 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1269,6 +1269,9 @@ static void macvlan_port_destroy(struct net_device *dev) static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + struct nlattr *nla, *head; + int rem, len; + if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; @@ -1316,6 +1319,20 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } + if (data[IFLA_MACVLAN_MACADDR_DATA]) { + head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]); + len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); + + nla_for_each_attr(nla, head, len, rem) { + if (nla_type(nla) != IFLA_MACVLAN_MACADDR || + nla_len(nla) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(nla))) + return -EADDRNOTAVAIL; + } + } + if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL; @@ -1372,10 +1389,6 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); nla_for_each_attr(nla, head, len, rem) { - if (nla_type(nla) != IFLA_MACVLAN_MACADDR || - nla_len(nla) != ETH_ALEN) - continue; - addr = nla_data(nla); ret = macvlan_hash_add_source(vlan, addr); if (ret) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index a4fbf3a4fa9739..6bc7406a1ce731 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1738,13 +1738,13 @@ static int __phy_write_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb, return 0; } -/* Trigger a read to the spcified MCB */ +/* Trigger a read to the specified MCB */ static int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) { return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_READ); } -/* Trigger a write to the spcified MCB */ +/* Trigger a write to the specified MCB */ static int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) { return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_WRITE); diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index 73aaae5574ed56..e020faff7da531 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -142,16 +142,15 @@ static int idtcm_strverscmp(const char *ver1, const char *ver2) return result; } -static int idtcm_xfer(struct idtcm *idtcm, - u8 regaddr, - u8 *buf, - u16 count, - bool write) +static int idtcm_xfer_read(struct idtcm *idtcm, + u8 regaddr, + u8 *buf, + u16 count) { struct i2c_client *client = idtcm->client; struct i2c_msg msg[2]; int cnt; - char *fmt = "i2c_transfer failed at %d in %s for %s, at addr: %04X!\n"; + char *fmt = "i2c_transfer failed at %d in %s, at addr: %04X!\n"; msg[0].addr = client->addr; msg[0].flags = 0; @@ -159,7 +158,7 @@ static int idtcm_xfer(struct idtcm *idtcm, msg[0].buf = ®addr; msg[1].addr = client->addr; - msg[1].flags = write ? 0 : I2C_M_RD; + msg[1].flags = I2C_M_RD; msg[1].len = count; msg[1].buf = buf; @@ -170,7 +169,6 @@ static int idtcm_xfer(struct idtcm *idtcm, fmt, __LINE__, __func__, - write ? "write" : "read", regaddr); return cnt; } else if (cnt != 2) { @@ -182,6 +180,37 @@ static int idtcm_xfer(struct idtcm *idtcm, return 0; } +static int idtcm_xfer_write(struct idtcm *idtcm, + u8 regaddr, + u8 *buf, + u16 count) +{ + struct i2c_client *client = idtcm->client; + /* we add 1 byte for device register */ + u8 msg[IDTCM_MAX_WRITE_COUNT + 1]; + int cnt; + char *fmt = "i2c_master_send failed at %d in %s, at addr: %04X!\n"; + + if (count > IDTCM_MAX_WRITE_COUNT) + return -EINVAL; + + msg[0] = regaddr; + memcpy(&msg[1], buf, count); + + cnt = i2c_master_send(client, msg, count + 1); + + if (cnt < 0) { + dev_err(&client->dev, + fmt, + __LINE__, + __func__, + regaddr); + return cnt; + } + + return 0; +} + static int idtcm_page_offset(struct idtcm *idtcm, u8 val) { u8 buf[4]; @@ -195,7 +224,7 @@ static int idtcm_page_offset(struct idtcm *idtcm, u8 val) buf[2] = 0x10; buf[3] = 0x20; - err = idtcm_xfer(idtcm, PAGE_ADDR, buf, sizeof(buf), 1); + err = idtcm_xfer_write(idtcm, PAGE_ADDR, buf, sizeof(buf)); if (err) { idtcm->page_offset = 0xff; @@ -223,11 +252,12 @@ static int _idtcm_rdwr(struct idtcm *idtcm, err = idtcm_page_offset(idtcm, hi); if (err) - goto out; + return err; - err = idtcm_xfer(idtcm, lo, buf, count, write); -out: - return err; + if (write) + return idtcm_xfer_write(idtcm, lo, buf, count); + + return idtcm_xfer_read(idtcm, lo, buf, count); } static int idtcm_read(struct idtcm *idtcm, diff --git a/drivers/ptp/ptp_clockmatrix.h b/drivers/ptp/ptp_clockmatrix.h index ffae56c5d97fab..82840d72364adb 100644 --- a/drivers/ptp/ptp_clockmatrix.h +++ b/drivers/ptp/ptp_clockmatrix.h @@ -55,6 +55,8 @@ #define PEROUT_ENABLE_OUTPUT_MASK (0xdeadbeef) +#define IDTCM_MAX_WRITE_COUNT (512) + /* Values of DPLL_N.DPLL_MODE.PLL_MODE */ enum pll_mode { PLL_MODE_MIN = 0, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 12eebcdea9c8a2..e0decff22ae273 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) * not already there, and calling reverse_path_check() * during ep_insert(). */ - if (list_empty(&epi->ffd.file->f_tfile_llink)) + if (list_empty(&epi->ffd.file->f_tfile_llink)) { + get_file(epi->ffd.file); list_add(&epi->ffd.file->f_tfile_llink, &tfile_check_list); + } } } mutex_unlock(&ep->mtx); @@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void) file = list_first_entry(&tfile_check_list, struct file, f_tfile_llink); list_del_init(&file->f_tfile_llink); + fput(file); } INIT_LIST_HEAD(&tfile_check_list); } @@ -2200,25 +2203,22 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, full_check = 1; if (is_file_epoll(tf.file)) { error = -ELOOP; - if (ep_loop_check(ep, tf.file) != 0) { - clear_tfile_check_list(); + if (ep_loop_check(ep, tf.file) != 0) goto error_tgt_fput; - } - } else + } else { + get_file(tf.file); list_add(&tf.file->f_tfile_llink, &tfile_check_list); + } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); - if (error) { -out_del: - list_del(&tf.file->f_tfile_llink); + if (error) goto error_tgt_fput; - } if (is_file_epoll(tf.file)) { tep = tf.file->private_data; error = epoll_mutex_lock(&tep->mtx, 1, nonblock); if (error) { mutex_unlock(&ep->mtx); - goto out_del; + goto error_tgt_fput; } } } @@ -2239,8 +2239,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = ep_insert(ep, epds, tf.file, fd, full_check); } else error = -EEXIST; - if (full_check) - clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -2263,8 +2261,10 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, mutex_unlock(&ep->mtx); error_tgt_fput: - if (full_check) + if (full_check) { + clear_tfile_check_list(); mutex_unlock(&epmutex); + } fdput(tf); error_fput: diff --git a/include/net/addrconf.h b/include/net/addrconf.h index ba3f6c15ad2b5e..18f783dcd55fa3 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -97,7 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr, int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); -struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr); +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd27..b6238b2209b71a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b6715964b68561..8faa2ce893968f 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,9 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +/* maximum visited objects before bailing out */ +#define MAX_ITER_OBJECTS 1000000 + /* bpf_seq_read, a customized and simpler version for bpf iterator. * no_llseek is assumed for this file. * The following are differences from seq_read(): @@ -79,7 +82,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, { struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; - int err = 0; + int err = 0, num_objs = 0; void *p; mutex_lock(&seq->lock); @@ -135,6 +138,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, while (1) { loff_t pos = seq->index; + num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); if (pos == seq->index) { @@ -153,6 +157,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, if (seq->count >= size) break; + if (num_objs >= MAX_ITER_OBJECTS) { + if (offs == 0) { + err = -EAGAIN; + seq->op->stop(seq, p); + goto done; + } + break; + } + err = seq->op->show(seq, p); if (err > 0) { bpf_iter_dec_seq_num(seq); diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 232df29793e962..99af4cea110253 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -29,8 +29,9 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns, rcu_read_lock(); retry: - pid = idr_get_next(&ns->idr, tid); + pid = find_ge_pid(*tid, ns); if (pid) { + *tid = pid_nr_ns(pid, ns); task = get_pid_task(pid, PIDTYPE_PID); if (!task) { ++*tid; @@ -178,10 +179,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, f = fcheck_files(curr_files, curr_fd); if (!f) continue; + if (!get_file_rcu(f)) + continue; /* set info->fd */ info->fd = curr_fd; - get_file(f); rcu_read_unlock(); return f; } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 9852e0d62d95cc..fcae019158cad4 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -65,7 +65,8 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, syscall_enter_audit(regs, syscall); - return ret ? : syscall; + /* The above might have changed the syscall number */ + return ret ? : syscall_get_nr(current, regs); } noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) diff --git a/net/core/dev.c b/net/core/dev.c index b5d1129d83103b..d42c9ea0c3c037 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8742,13 +8742,15 @@ struct bpf_xdp_link { int flags; }; -static enum bpf_xdp_mode dev_xdp_mode(u32 flags) +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) { if (flags & XDP_FLAGS_HW_MODE) return XDP_MODE_HW; if (flags & XDP_FLAGS_DRV_MODE) return XDP_MODE_DRV; - return XDP_MODE_SKB; + if (flags & XDP_FLAGS_SKB_MODE) + return XDP_MODE_SKB; + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; } static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) @@ -8896,7 +8898,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EINVAL; } - mode = dev_xdp_mode(flags); + mode = dev_xdp_mode(dev, flags); /* can't replace attached link */ if (dev_xdp_link(dev, mode)) { NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); @@ -8984,7 +8986,7 @@ static int dev_xdp_detach_link(struct net_device *dev, ASSERT_RTNL(); - mode = dev_xdp_mode(link->flags); + mode = dev_xdp_mode(dev, link->flags); if (dev_xdp_link(dev, mode) != link) return -EINVAL; @@ -9080,7 +9082,7 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } - mode = dev_xdp_mode(xdp_link->flags); + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); @@ -9164,7 +9166,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags) { - enum bpf_xdp_mode mode = dev_xdp_mode(flags); + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); struct bpf_prog *new_prog = NULL, *old_prog = NULL; int err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c3b906aeef3dc..e18184ffa9c3fa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5987,9 +5987,13 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - if (k == 0) { - /* split line is in frag list */ - pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask); + /* split line is in frag list */ + if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { + /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ + if (skb_has_frag_list(skb)) + kfree_skb_list(skb_shinfo(skb)->frag_list); + kfree(data); + return -ENOMEM; } skb_release_data(skb); diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 4e632dc987d859..495635f152ba68 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -224,7 +224,9 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(old_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(new_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; @@ -250,6 +252,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); ethnl_features_to_bitmap(old_active, dev->features); + ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, tb[ETHTOOL_A_FEATURES_WANTED], netdev_features_strings, info->extack); @@ -261,17 +264,15 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; } - /* set req_wanted bits not in req_mask from old_active */ + /* set req_wanted bits not in req_mask from old_wanted */ bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); - bitmap_andnot(new_active, old_active, req_mask, NETDEV_FEATURE_COUNT); - bitmap_or(req_wanted, new_active, req_wanted, NETDEV_FEATURE_COUNT); - if (bitmap_equal(req_wanted, old_active, NETDEV_FEATURE_COUNT)) { - ret = 0; - goto out_rtnl; + bitmap_andnot(new_wanted, old_wanted, req_mask, NETDEV_FEATURE_COUNT); + bitmap_or(req_wanted, new_wanted, req_wanted, NETDEV_FEATURE_COUNT); + if (!bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { + dev->wanted_features &= ~dev->hw_features; + dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; + __netdev_update_features(dev); } - - dev->wanted_features = ethnl_bitmap_to_features(req_wanted); - __netdev_update_features(dev); ethnl_features_to_bitmap(new_active, dev->features); mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 60db5a6487cc5f..87983e70f03f32 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -661,13 +661,13 @@ config TCP_CONG_BBR BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to maximize network utilization and minimize queues. It builds an explicit - model of the the bottleneck delivery rate and path round-trip - propagation delay. It tolerates packet loss and delay unrelated to - congestion. It can operate over LAN, WAN, cellular, wifi, or cable - modem links. It can coexist with flows that use loss-based congestion - control, and can operate with shallow buffers, deep buffers, - bufferbloat, policers, or AQM schemes that do not provide a delay - signal. It requires the fq ("Fair Queue") pacing packet scheduler. + model of the bottleneck delivery rate and path round-trip propagation + delay. It tolerates packet loss and delay unrelated to congestion. It + can operate over LAN, WAN, cellular, wifi, or cable modem links. It can + coexist with flows that use loss-based congestion control, and can + operate with shallow buffers, deep buffers, bufferbloat, policers, or + AQM schemes that do not provide a delay signal. It requires the fq + ("Fair Queue") pacing packet scheduler. choice prompt "Default TCP congestion control" diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index cc8049b100b241..134e923822750a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -446,7 +446,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], unsigned int i, j; u8 nhg_fdb = 0; - if (len & (sizeof(struct nexthop_grp) - 1)) { + if (!len || len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, "Invalid length for nexthop group attribute"); return -EINVAL; @@ -1187,6 +1187,9 @@ static struct nexthop *nexthop_create_group(struct net *net, struct nexthop *nh; int i; + if (WARN_ON(!num_nh)) + return ERR_PTR(-EINVAL); + nh = nexthop_alloc(); if (!nh) return ERR_PTR(-ENOMEM); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8e761b8c47c690..01146b66d6669f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1893,12 +1893,13 @@ EXPORT_SYMBOL(ipv6_chk_addr); * 2. does the address exist on the specific device * (skip_dev_check = false) */ -int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, bool skip_dev_check, - int strict, u32 banned_flags) +static struct net_device * +__ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); - const struct net_device *l3mdev; + struct net_device *l3mdev, *ndev; struct inet6_ifaddr *ifp; u32 ifp_flags; @@ -1909,10 +1910,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, dev = NULL; hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) + ndev = ifp->idev->dev; + if (!net_eq(dev_net(ndev), net)) continue; - if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + if (l3mdev_master_dev_rcu(ndev) != l3mdev) continue; /* Decouple optimistic from tentative for evaluation here. @@ -1923,15 +1925,23 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp_flags&banned_flags) && - (!dev || ifp->idev->dev == dev || + (!dev || ndev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock(); - return 1; + return ndev; } } rcu_read_unlock(); - return 0; + return NULL; +} + +int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) +{ + return __ipv6_chk_addr_and_flags(net, addr, dev, skip_dev_check, + strict, banned_flags) ? 1 : 0; } EXPORT_SYMBOL(ipv6_chk_addr_and_flags); @@ -1990,35 +2000,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix); * * The caller should be protected by RCU, or RTNL. */ -struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr) +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { - unsigned int hash = inet6_addr_hash(net, addr); - struct inet6_ifaddr *ifp, *result = NULL; - struct net_device *dev = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (net_eq(dev_net(ifp->idev->dev), net) && - ipv6_addr_equal(&ifp->addr, addr)) { - result = ifp; - break; - } - } - - if (!result) { - struct rt6_info *rt; - - rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); - if (rt) { - dev = rt->dst.dev; - ip6_rt_put(rt); - } - } else { - dev = result->idev->dev; - } - rcu_read_unlock(); - - return dev; + return __ipv6_chk_addr_and_flags(net, addr, dev, !dev, 1, + IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_dev_find); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f635914f42ec60..a0217e5bf3bc11 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -915,7 +915,15 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, struct metadata_dst *tun_dst, bool log_ecn_err) { - return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb); + + dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; + if (tpi->proto == htons(ETH_P_IP)) + dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; + + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); diff --git a/net/netlink/policy.c b/net/netlink/policy.c index f6491853c79715..2b3e26f7496f5f 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -51,6 +51,9 @@ static int add_policy(struct nl_policy_dump **statep, if (!state) return -ENOMEM; + memset(&state->policies[state->n_alloc], 0, + flex_array_size(state, policies, n_alloc - state->n_alloc)); + state->policies[state->n_alloc].policy = policy; state->policies[state->n_alloc].maxtype = maxtype; state->n_alloc = n_alloc; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e6ad42b11835f8..2c3619165680fd 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -704,7 +704,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = ip_defrag(net, skb, user); local_bh_enable(); if (err && err != -EINPROGRESS) - goto out_free; + return err; if (!err) { *defrag = true; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index bda2536dd740f7..6dc95dcc0ff4f0 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -88,12 +88,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, int ret; if (outcnt <= stream->outcnt) - return 0; + goto out; ret = genradix_prealloc(&stream->out, outcnt, gfp); if (ret) return ret; +out: stream->outcnt = outcnt; return 0; } @@ -104,12 +105,13 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, int ret; if (incnt <= stream->incnt) - return 0; + goto out; ret = genradix_prealloc(&stream->in, incnt, gfp); if (ret) return ret; +out: stream->incnt = incnt; return 0; } diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index e1f64f4ba23614..da9ba6d1679b78 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -170,13 +170,15 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; - struct smcd_diag_dmbinfo dinfo = { - .linkid = *((u32 *)conn->lgr->id), - .peer_gid = conn->lgr->peer_gid, - .my_gid = conn->lgr->smcd->local_gid, - .token = conn->rmb_desc->token, - .peer_token = conn->peer_token - }; + struct smcd_diag_dmbinfo dinfo; + + memset(&dinfo, 0, sizeof(dinfo)); + + dinfo.linkid = *((u32 *)conn->lgr->id); + dinfo.peer_gid = conn->lgr->peer_gid; + dinfo.my_gid = conn->lgr->smcd->local_gid; + dinfo.token = conn->rmb_desc->token; + dinfo.peer_token = conn->peer_token; if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) goto errout; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 001bcb0f2480dc..c38babaa4e579b 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -757,10 +757,12 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) switch (err) { case 0: this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); + rcu_read_lock(); if (likely(test_bit(0, &b->up))) b->media->send_msg(net, skb, b, &tx_ctx->dst); else kfree_skb(skb); + rcu_read_unlock(); break; case -EINPROGRESS: return; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 53f0de0676b7a5..911d13cd2e675a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -660,6 +660,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; int rmcast = 0; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); @@ -714,8 +715,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, rcu_assign_pointer(ub->bearer, b); tipc_udp_media_addr_set(&b->addr, &local); if (local.proto == htons(ETH_P_IP)) { - struct net_device *dev; - dev = __ip_dev_find(net, local.ipv4.s_addr, false); if (!dev) { err = -ENODEV; @@ -738,9 +737,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { - struct net_device *dev; - - dev = ipv6_dev_find(net, &local.ipv6); + dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; + dev = ipv6_dev_find(net, &local.ipv6, dev); if (!dev) { err = -ENODEV; goto err; diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e3b116325403be..df7d8ec7603616 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) while (true) { ret = read(fd, buf, sizeof(buf)); if (ret < 0) { + if (errno == EAGAIN) + continue; err = -errno; p_err("failed to read PID iterator output: %d", err); goto out; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 4d9ecb97586224..0def0bb1f78327 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* + * The data of compressed section should be aligned to 4 + * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld + * sets sh_addralign to 1, which makes libelf fail with + * misaligned section error during the update: + * FAILED elf_update(WRITE): invalid section alignment + * + * While waiting for ld fix, we fix the compressed sections + * sh_addralign value manualy. + */ +static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) +{ + int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8; + + if (!(sh->sh_flags & SHF_COMPRESSED)) + return 0; + + if (sh->sh_addralign == expected) + return 0; + + pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n", + sh->sh_addralign, expected); + + sh->sh_addralign = expected; + + if (gelf_update_shdr(scn, sh) == 0) { + printf("FAILED cannot update section header: %s\n", + elf_errmsg(-1)); + return -1; + } + return 0; +} + static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; @@ -309,6 +342,9 @@ static int elf_collect(struct object *obj) obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; } + + if (compressed_section_fix(elf, scn, &sh)) + return -1; } return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0480f893facd27..b6238b2209b71a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index fe39bd77469768..57c00fa6393290 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -879,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; } else { - m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; } btf_dump_printf(d, ";"); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d20b2da442713..0ad0b0491e1f2a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, data = elf_getdata(scn, NULL); if (!scn || !data) { pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + obj->efile.btf_maps_shndx, MAPS_ELF_SEC); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1bb204cee853fc..9a0946ddb705ac 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -6,7 +6,6 @@ test_lpm_map test_tag FEATURE-DUMP.libbpf fixdep -test_align test_dev_cgroup /test_progs* test_tcpbpf_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a83b5827532f77..fc946b7ac288df 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 25b068591e9a40..193002b14d7f66 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, log_buf = va_arg(args, char *); if (!log_buf) goto out; - if (strstr(log_buf, err_str) == 0) + if (err_str && strstr(log_buf, err_str) == 0) found = true; out: printf(format, log_buf);