From 54fc95af760a6f5343592e62684f48ff465afd24 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:23:42 +0800 Subject: [PATCH 01/19] LoongArch: Use arch specific phys_to_dma To be compatible with OLD firmware which has no _DMA method, we should use arch specific phys_to_dma. Signed-off-by: Hongchen Zhang Signed-off-by: Binbin Zhou --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/dma-direct.h | 21 +++++++++++++++++++++ arch/loongarch/kernel/dma.c | 14 ++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 arch/loongarch/include/asm/dma-direct.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3b4d487e0ae35..2b227f287e622 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -147,6 +147,7 @@ config LOONGARCH select HAVE_SETUP_PER_CPU_AREA if NUMA select HAVE_STACK_VALIDATION if HAVE_OBJTOOL select HAVE_STACKPROTECTOR + select ARCH_HAS_PHYS_TO_DMA select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP diff --git a/arch/loongarch/include/asm/dma-direct.h b/arch/loongarch/include/asm/dma-direct.h new file mode 100644 index 0000000000000..a8fbae3bd5dac --- /dev/null +++ b/arch/loongarch/include/asm/dma-direct.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_DMA_DIRECT_H +#define _ASM_LOONGARCH_DMA_DIRECT_H + +extern int node_id_offset; + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + long nid = (paddr >> 44) & 0xf; + + return ((nid << 44) ^ paddr) | (nid << node_id_offset); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + long nid = (daddr >> node_id_offset) & 0xf; + + return ((nid << node_id_offset) ^ daddr) | (nid << 44); +} + +#endif /* _ASM_LOONGARCH_DMA_DIRECT_H */ diff --git a/arch/loongarch/kernel/dma.c b/arch/loongarch/kernel/dma.c index 7a9c6a9dd2d01..34836408b15a3 100644 --- a/arch/loongarch/kernel/dma.c +++ b/arch/loongarch/kernel/dma.c @@ -4,6 +4,15 @@ */ #include #include +#include + +/* + * We extract 4bit node id (bit 44~47) from Loongson-3's + * 48bit physical address space and embed it into 40bit. + */ + +int node_id_offset; +EXPORT_SYMBOL_GPL(node_id_offset); void acpi_arch_dma_setup(struct device *dev) { @@ -11,6 +20,11 @@ void acpi_arch_dma_setup(struct device *dev) u64 mask, end = 0; const struct bus_dma_region *map = NULL; + if (node_id_offset == 0) { + node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF); + node_id_offset += 36; + } + ret = acpi_dma_get_range(dev, &map); if (!ret && map) { const struct bus_dma_region *r = map; From 106738ebe01e595af92b227d71bf05eadc82dda8 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:24:50 +0800 Subject: [PATCH 02/19] LoongArch: Change SHMLBA from SZ_64K to PAGE_SIZE commit d23b77953f5a4fbf94c05157b186aac2a247ae32 upstream. LoongArch has hardware page coloring for L1 Cache, so we don't have cache aliases. But SFB (Store Fill Buffer) still has aliases. So we define SHMLBA to SZ_64K previously. But there are losts of applications use PAGE_SIZE rather than SHMLBA to mmap() file pages and shared pages. Of course we can fix them one by one, but not easy. On the other hand, we can simply disable SFB for 4KB page size to fix cache alias (there will be performance decrease, but acceptable), and in future we will fix SFB in hardware. So we can safely define SHMLBA to PAGE_SIZE (use the generic shmparam.h) to make life easier. Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/shmparam.h | 12 ------------ arch/loongarch/kernel/head.S | 10 ++++++++++ 2 files changed, 10 insertions(+), 12 deletions(-) delete mode 100644 arch/loongarch/include/asm/shmparam.h diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h deleted file mode 100644 index c9554f48d2dfa..0000000000000 --- a/arch/loongarch/include/asm/shmparam.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_SHMPARAM_H -#define _ASM_SHMPARAM_H - -#define __ARCH_FORCE_SHMLBA 1 - -#define SHMLBA SZ_64K /* attach addr a multiple of this */ - -#endif /* _ASM_SHMPARAM_H */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e336fbc4eb967..4677ea8fa8e98 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -74,6 +74,11 @@ SYM_CODE_START(kernel_entry) # kernel entry point la.pcrel t0, fw_arg2 st.d a2, t0, 0 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* KSave3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ @@ -126,6 +131,11 @@ SYM_CODE_START(smpboot_entry) JUMP_VIRT_ADDR t0, t1 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD From c9751369af958a8cacb58e023b8f2fd7dbb53f2f Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:27:35 +0800 Subject: [PATCH 03/19] LoongArch: Add writecombine support for DMW-based ioremap() commit 8e02c3b782ec64343f3cccc8dc5a8be2b379e80b upstream. Currently, only TLB-based ioremap() support writecombine, so add the counterpart for DMW-based ioremap() with help of DMW2. The base address (WRITECOMBINE_BASE) is configured as 0xa000000000000000. DMW3 is unused by kernel now, however firmware may leave garbage in them and interfere kernel's address mapping. So clear it as necessary. BTW, centralize the DMW configuration to macro SETUP_DMWINS. Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/addrspace.h | 4 ++++ arch/loongarch/include/asm/io.h | 10 ++++++++-- arch/loongarch/include/asm/loongarch.h | 10 +++++++++- arch/loongarch/include/asm/stackframe.h | 11 +++++++++++ arch/loongarch/kernel/head.S | 12 ++---------- arch/loongarch/power/suspend_asm.S | 6 +----- drivers/firmware/efi/libstub/loongarch.c | 2 ++ 7 files changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b24437e28c6ed..913981b835f4b 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -36,6 +36,10 @@ extern unsigned long vm_map_base; #define UNCACHE_BASE CSR_DMW0_BASE #endif +#ifndef WRITECOMBINE_BASE +#define WRITECOMBINE_BASE CSR_DMW2_BASE +#endif + #define DMW_PABITS 48 #define TO_PHYS_MASK ((1ULL << DMW_PABITS) - 1) diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 4a8adcca329b8..838db690b723a 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -30,10 +30,16 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long prot_val) { - if (prot_val & _CACHE_CC) + switch (prot_val & _CACHE_MASK) { + case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); - else + case _CACHE_SUC: return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset); + case _CACHE_WUC: + return (void __iomem *)(unsigned long)(WRITECOMBINE_BASE + offset); + default: + return NULL; + } } #define ioremap(offset, size) \ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 647dddc57624a..74875d470f5c6 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -878,7 +878,7 @@ #define LOONGARCH_CSR_DMWIN2 0x182 /* 64 direct map win2: MEM */ #define LOONGARCH_CSR_DMWIN3 0x183 /* 64 direct map win3: MEM */ -/* Direct Map window 0/1 */ +/* Direct Map window 0/1/2/3 */ #define CSR_DMW0_PLV0 _CONST64_(1 << 0) #define CSR_DMW0_VSEG _CONST64_(0x8000) #define CSR_DMW0_BASE (CSR_DMW0_VSEG << DMW_PABITS) @@ -890,6 +890,14 @@ #define CSR_DMW1_BASE (CSR_DMW1_VSEG << DMW_PABITS) #define CSR_DMW1_INIT (CSR_DMW1_BASE | CSR_DMW1_MAT | CSR_DMW1_PLV0) +#define CSR_DMW2_PLV0 _CONST64_(1 << 0) +#define CSR_DMW2_MAT _CONST64_(2 << 4) +#define CSR_DMW2_VSEG _CONST64_(0xa000) +#define CSR_DMW2_BASE (CSR_DMW2_VSEG << DMW_PABITS) +#define CSR_DMW2_INIT (CSR_DMW2_BASE | CSR_DMW2_MAT | CSR_DMW2_PLV0) + +#define CSR_DMW3_INIT 0x0 + /* Performance Counter registers */ #define LOONGARCH_CSR_PERFCTRL0 0x200 /* 32 perf event 0 config */ #define LOONGARCH_CSR_PERFCNTR0 0x201 /* 64 perf event 0 count value */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index d9eafd3ee3d1e..66736837085b6 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -38,6 +38,17 @@ cfi_restore \reg \offset \docfi .endm + .macro SETUP_DMWINS temp + li.d \temp, CSR_DMW0_INIT # WUC, PLV0, 0x8000 xxxx xxxx xxxx + csrwr \temp, LOONGARCH_CSR_DMWIN0 + li.d \temp, CSR_DMW1_INIT # CAC, PLV0, 0x9000 xxxx xxxx xxxx + csrwr \temp, LOONGARCH_CSR_DMWIN1 + li.d \temp, CSR_DMW2_INIT # WUC, PLV0, 0xa000 xxxx xxxx xxxx + csrwr \temp, LOONGARCH_CSR_DMWIN2 + li.d \temp, CSR_DMW3_INIT # 0x0, unused + csrwr \temp, LOONGARCH_CSR_DMWIN3 + .endm + /* Jump to the runtime virtual address. */ .macro JUMP_VIRT_ADDR temp1 temp2 li.d \temp1, CACHE_BASE diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 4677ea8fa8e98..fdb831dc64df2 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -44,11 +44,7 @@ SYM_DATA(kernel_fsize, .long _kernel_fsize); SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ - li.d t0, CSR_DMW0_INIT # UC, PLV0, 0x8000 xxxx xxxx xxxx - csrwr t0, LOONGARCH_CSR_DMWIN0 - li.d t0, CSR_DMW1_INIT # CA, PLV0, 0x9000 xxxx xxxx xxxx - csrwr t0, LOONGARCH_CSR_DMWIN1 - + SETUP_DMWINS t0 JUMP_VIRT_ADDR t0, t1 /* Enable PG */ @@ -124,11 +120,7 @@ SYM_CODE_END(kernel_entry) * function after setting up the stack and tp registers. */ SYM_CODE_START(smpboot_entry) - li.d t0, CSR_DMW0_INIT # UC, PLV0 - csrwr t0, LOONGARCH_CSR_DMWIN0 - li.d t0, CSR_DMW1_INIT # CA, PLV0 - csrwr t0, LOONGARCH_CSR_DMWIN1 - + SETUP_DMWINS t0 JUMP_VIRT_ADDR t0, t1 #ifdef CONFIG_PAGE_SIZE_4KB diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index e2fc3b4e31f00..c28ad52b7bafb 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -73,11 +73,7 @@ SYM_FUNC_START(loongarch_suspend_enter) * Reload all of the registers and return. */ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) - li.d t0, CSR_DMW0_INIT # UC, PLV0 - csrwr t0, LOONGARCH_CSR_DMWIN0 - li.d t0, CSR_DMW1_INIT # CA, PLV0 - csrwr t0, LOONGARCH_CSR_DMWIN1 - + SETUP_DMWINS t0 JUMP_VIRT_ADDR t0, t1 /* Enable PG */ diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index e0ae9c8550c32..8bea96492ef21 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c @@ -92,6 +92,8 @@ efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, /* Config Direct Mapping */ csr_write64(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); csr_write64(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); + csr_write64(CSR_DMW2_INIT, LOONGARCH_CSR_DMWIN2); + csr_write64(CSR_DMW3_INIT, LOONGARCH_CSR_DMWIN3); real_kernel_entry = (void *)kernel_entry_address(kernel_addr, image); From 484892e0edfc92784195e8b12ab1606fd36e1f08 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:43:07 +0800 Subject: [PATCH 04/19] LoongArch: Remove superfluous flush_dcache_page() definition commit 82bf60a6fed806d57e284a1fb40dbc1ad5097611 upstream. LoongArch doesn't have cache aliases, so flush_dcache_page() is a no-op. There is a generic implementation for this case in include/asm-generic/ cacheflush.h. So remove the superfluous flush_dcache_page() definition, which also silences such build warnings: In file included from crypto/scompress.c:12: include/crypto/scatterwalk.h: In function 'scatterwalk_pagedone': include/crypto/scatterwalk.h:76:30: warning: variable 'page' set but not used [-Wunused-but-set-variable] 76 | struct page *page; | ^~~~ crypto/scompress.c: In function 'scomp_acomp_comp_decomp': >> crypto/scompress.c:174:38: warning: unused variable 'dst_page' [-Wunused-variable] 174 | struct page *dst_page = sg_page(req->dst); | Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403091614.NeUw5zcv-lkp@intel.com/ Suggested-by: Barry Song Acked-by: Barry Song Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/cacheflush.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 80bd74106985a..f8754d08a31ab 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -37,8 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 - #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_dup_mm(mm) do { } while (0) @@ -47,7 +45,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #define flush_icache_user_page(vma, page, addr, len) do { } while (0) -#define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) From 0e7f5c1f231a42e8fd3c4edf6f0eb69d827de997 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:44:05 +0800 Subject: [PATCH 05/19] LoongArch: Use accessors to page table entries instead of direct dereference commit 4574815abf43e2bf05643e1b3f7a2e5d6df894f0 upstream. As very well explained in commit 20a004e7b017cce282 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables"), an architecture whose page table walker can modify the PTE in parallel must use READ_ONCE()/ WRITE_ONCE() macro to avoid any compiler transformation. So apply that to LoongArch which is such an architecture, in order to avoid potential problems. Similar to commit edf955647269422e ("riscv: Use accessors to page table entries instead of direct dereference"). Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/hugetlb.h | 4 +-- arch/loongarch/include/asm/kfence.h | 6 ++-- arch/loongarch/include/asm/pgtable.h | 48 +++++++++++++++++----------- arch/loongarch/kvm/mmu.c | 8 ++--- arch/loongarch/mm/hugetlbpage.c | 6 ++-- arch/loongarch/mm/init.c | 10 +++--- arch/loongarch/mm/kasan_init.c | 10 +++--- arch/loongarch/mm/pgtable.c | 2 +- 8 files changed, 52 insertions(+), 42 deletions(-) diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index 427b487fbfd65..376c0708e2979 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -44,7 +44,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t clear; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_val(clear) = (unsigned long)invalid_pte_table; set_pte_at(mm, addr, ptep, clear); @@ -75,7 +75,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - int changed = !pte_same(*ptep, pte); + int changed = !pte_same(ptep_get(ptep), pte); if (changed) { set_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 6c82aea1c9939..2835b41d2a848 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -43,13 +43,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (WARN_ON(!pte) || pte_none(*pte)) + if (WARN_ON(!pte) || pte_none(ptep_get(pte))) return false; if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~(_PAGE_VALID | _PAGE_PRESENT))); else - set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | (_PAGE_VALID | _PAGE_PRESENT))); preempt_disable(); local_flush_tlb_one(addr); diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 29d9b12298bc8..2df497d2f864f 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define KFENCE_AREA_START (VMEMMAP_END + 1) #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) +#define ptep_get(ptep) READ_ONCE(*(ptep)) +#define pmdp_get(pmdp) READ_ONCE(*(pmdp)) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -147,11 +150,6 @@ static inline int p4d_present(p4d_t p4d) return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void p4d_clear(p4d_t *p4dp) -{ - p4d_val(*p4dp) = (unsigned long)invalid_pud_table; -} - static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)p4d_val(p4d); @@ -159,7 +157,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *p4d = p4dval; + WRITE_ONCE(*p4d, p4dval); +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, __p4d((unsigned long)invalid_pud_table)); } #define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) @@ -193,17 +196,20 @@ static inline int pud_present(pud_t pud) return pud_val(pud) != (unsigned long)invalid_pmd_table; } -static inline void pud_clear(pud_t *pudp) +static inline pmd_t *pud_pgtable(pud_t pud) { - pud_val(*pudp) = ((unsigned long)invalid_pmd_table); + return (pmd_t *)pud_val(pud); } -static inline pmd_t *pud_pgtable(pud_t pud) +static inline void set_pud(pud_t *pud, pud_t pudval) { - return (pmd_t *)pud_val(pud); + WRITE_ONCE(*pud, pudval); } -#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud((unsigned long)invalid_pmd_table)); +} #define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) @@ -231,12 +237,15 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) != (unsigned long)invalid_pte_table; } -static inline void pmd_clear(pmd_t *pmdp) +static inline void set_pmd(pmd_t *pmd, pmd_t pmdval) { - pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); + WRITE_ONCE(*pmd, pmdval); } -#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd((unsigned long)invalid_pte_table)); +} #define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) @@ -314,7 +323,8 @@ extern void paging_init(void); static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); + if (pte_val(pteval) & _PAGE_GLOBAL) { pte_t *buddy = ptep_buddy(ptep); /* @@ -341,8 +351,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ - if (pte_none(*buddy)) - pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + if (pte_none(ptep_get(buddy))) + WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ } } @@ -350,7 +360,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* Preserve global status for the pair */ - if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) set_pte(ptep, __pte(_PAGE_GLOBAL)); else set_pte(ptep, __pte(0)); @@ -591,7 +601,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old = *pmdp; + pmd_t old = pmdp_get(pmdp); pmd_clear(pmdp); diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 915f175278931..3a0bd3486c86f 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -727,19 +727,19 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 1e76fcb83093d..62ddcea0aa146 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -39,11 +39,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { + if (pgd_present(pgdp_get(pgd))) { p4d = p4d_offset(pgd, addr); - if (p4d_present(*p4d)) { + if (p4d_present(p4dp_get(p4d))) { pud = pud_offset(p4d, addr); - if (pud_present(*pud)) + if (pud_present(pudp_get(pud))) pmd = pmd_offset(pud, addr); } } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 4dd53427f6578..71fa4b773eb3c 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -140,7 +140,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int huge = pmd_val(*pmd) & _PAGE_HUGE; + int huge = pmd_val(pmdp_get(pmd)) & _PAGE_HUGE; if (huge) vmemmap_verify((pte_t *)pmd, node, addr, next); @@ -172,7 +172,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pud) panic("%s: Failed to allocate memory\n", __func__); @@ -183,7 +183,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pmd) panic("%s: Failed to allocate memory\n", __func__); @@ -194,7 +194,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *pte; pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -215,7 +215,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ptep = populate_kernel_pte(addr); - if (!pte_none(*ptep)) { + if (!pte_none(ptep_get(ptep))) { pte_ERROR(*ptep); return; } diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 082cb2a6f1ef2..7277b7583e1b7 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -112,7 +112,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (__pmd_none(early, READ_ONCE(*pmdp))) { + if (__pmd_none(early, pmdp_get(pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); if (!early) @@ -125,7 +125,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (__pud_none(early, READ_ONCE(*pudp))) { + if (__pud_none(early, pudp_get(pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); if (!early) @@ -138,7 +138,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) { - if (__p4d_none(early, READ_ONCE(*p4dp))) { + if (__p4d_none(early, p4dp_get(p4dp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); if (!early) @@ -174,7 +174,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && __pte_none(early, ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, @@ -186,7 +186,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, do { next = pmd_addr_end(addr, end); kasan_pte_populate(pmdp, addr, next, node, early); - } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, pmdp_get(pmdp))); } static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 2aae72e638713..6a038a27373aa 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -128,7 +128,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot) void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); flush_tlb_all(); } From b2fa48cb2cf6acba40acbe2af8220132a74c3202 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:45:09 +0800 Subject: [PATCH 06/19] LoongArch: Improve hardware page table walker commit f93f67d06b1023313ef1662eac490e29c025c030 upstream. LoongArch has similar problems explained in commit 7f0b1bf04511348995d6 ("arm64: Fix barriers used for page table modifications"), when hardware page table walker (PTW) enabled, speculative accesses may cause spurious page fault in kernel space. Theoretically, in order to completely avoid spurious page fault we need a "dbar + ibar" pair between the page table modifications and the subsequent memory accesses using the corresponding virtual address. But "ibar" is too heavy for performace, so we only use a "dbar 0b11000" in set_pte(). And let spurious_fault() filter the rest rare spurious page faults which should be avoided by "ibar". Besides, we replace the llsc loop with amo in set_pte() which has better performace, and refactor mmu_context.h to 1) avoid any load/store/branch instructions between the writing of CSR.ASID & CSR.PGDL, 2) ensure flush tlb operation is after updating ASID. Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/atomic.h | 2 ++ arch/loongarch/include/asm/mmu_context.h | 35 +++++++++++++++----- arch/loongarch/include/asm/pgtable.h | 32 ++++++++---------- arch/loongarch/mm/fault.c | 41 ++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 27 deletions(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index e27f0c72d3242..2143202cb380f 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -15,6 +15,7 @@ #define __LL "ll.w " #define __SC "sc.w " #define __AMADD "amadd.w " +#define __AMOR "amor.w " #define __AMAND_DB "amand_db.w " #define __AMOR_DB "amor_db.w " #define __AMXOR_DB "amxor_db.w " @@ -22,6 +23,7 @@ #define __LL "ll.d " #define __SC "sc.d " #define __AMADD "amadd.d " +#define __AMOR "amor.d " #define __AMAND_DB "amand_db.d " #define __AMOR_DB "amor_db.d " #define __AMXOR_DB "amxor_db.d " diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h index 9f97c3453b9ce..304363bd39352 100644 --- a/arch/loongarch/include/asm/mmu_context.h +++ b/arch/loongarch/include/asm/mmu_context.h @@ -49,12 +49,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) /* Normal, classic get_new_mmu_context */ static inline void -get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) +get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, bool *need_flush) { u64 asid = asid_cache(cpu); if (!((++asid) & cpu_asid_mask(&cpu_data[cpu]))) - local_flush_tlb_user(); /* start new asid cycle */ + *need_flush = true; /* start new asid cycle */ cpu_context(cpu, mm) = asid_cache(cpu) = asid; } @@ -74,21 +74,34 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } +static inline void atomic_update_pgd_asid(unsigned long asid, unsigned long pgdl) +{ + __asm__ __volatile__( + "csrwr %[pgdl_val], %[pgdl_reg] \n\t" + "csrwr %[asid_val], %[asid_reg] \n\t" + : [asid_val] "+r" (asid), [pgdl_val] "+r" (pgdl) + : [asid_reg] "i" (LOONGARCH_CSR_ASID), [pgdl_reg] "i" (LOONGARCH_CSR_PGDL) + : "memory" + ); +} + static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool need_flush = false; unsigned int cpu = smp_processor_id(); /* Check if our ASID is of an older version and thus invalid */ if (!asid_valid(next, cpu)) - get_new_mmu_context(next, cpu); - - write_csr_asid(cpu_asid(cpu, next)); + get_new_mmu_context(next, cpu, &need_flush); if (next != &init_mm) - csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)next->pgd); else - csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + atomic_update_pgd_asid(cpu_asid(cpu, next), (unsigned long)invalid_pg_dir); + + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ /* * Mark current->active_mm as not "active" anymore. @@ -135,9 +148,15 @@ drop_mmu_context(struct mm_struct *mm, unsigned int cpu) asid = read_csr_asid() & cpu_asid_mask(¤t_cpu_data); if (asid == cpu_asid(cpu, mm)) { + bool need_flush = false; + if (!current->mm || (current->mm == mm)) { - get_new_mmu_context(mm, cpu); + get_new_mmu_context(mm, cpu, &need_flush); + write_csr_asid(cpu_asid(cpu, mm)); + if (need_flush) + local_flush_tlb_user(); /* Flush tlb after update ASID */ + goto out; } } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 2df497d2f864f..0cfcfaf2ae507 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -331,29 +331,23 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ + if (pte_none(ptep_get(buddy))) { #ifdef CONFIG_SMP - /* - * For SMP, multiple CPUs can race, so we need to do - * this atomically. - */ - unsigned long page_global = _PAGE_GLOBAL; - unsigned long tmp; - - __asm__ __volatile__ ( - "1:" __LL "%[tmp], %[buddy] \n" - " bnez %[tmp], 2f \n" - " or %[tmp], %[tmp], %[global] \n" - __SC "%[tmp], %[buddy] \n" - " beqz %[tmp], 1b \n" - " nop \n" - "2: \n" - __WEAK_LLSC_MB - : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) - : [global] "r" (page_global)); + /* + * For SMP, multiple CPUs can race, so we need + * to do this atomically. + */ + __asm__ __volatile__( + __AMOR "$zero, %[global], %[buddy] \n" + : [buddy] "+ZB" (buddy->pte) + : [global] "r" (_PAGE_GLOBAL) + : "memory"); + + DBAR(0b11000); /* o_wrw = 0b11000 */ #else /* !CONFIG_SMP */ - if (pte_none(ptep_get(buddy))) WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ + } } } diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 97b40defde060..deefd9617d008 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -31,11 +31,52 @@ int show_unhandled_signals = 1; +static int __kprobes spurious_fault(unsigned long write, unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!(address & __UA_LIMIT)) + return 0; + + pgd = pgd_offset_k(address); + if (!pgd_present(pgdp_get(pgd))) + return 0; + + p4d = p4d_offset(pgd, address); + if (!p4d_present(p4dp_get(p4d))) + return 0; + + pud = pud_offset(p4d, address); + if (!pud_present(pudp_get(pud))) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(pmdp_get(pmd))) + return 0; + + if (pmd_leaf(*pmd)) { + return write ? pmd_write(pmdp_get(pmd)) : 1; + } else { + pte = pte_offset_kernel(pmd, address); + if (!pte_present(ptep_get(pte))) + return 0; + + return write ? pte_write(ptep_get(pte)) : 1; + } +} + static void __kprobes no_context(struct pt_regs *regs, unsigned long write, unsigned long address) { const int field = sizeof(unsigned long) * 2; + if (spurious_fault(write, address)) + return; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; From 7c890437b613176ae145c9db508001b25a63238b Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:45:45 +0800 Subject: [PATCH 07/19] LoongArch: Set initial pte entry with PAGE_GLOBAL for kernel space commit d2f8671045b41871053dedaf3035a06ad53d2736 upstream. There are two pages in one TLB entry on LoongArch system. For kernel space, it requires both two pte entries (buddies) with PAGE_GLOBAL bit set, otherwise HW treats it as non-global tlb, there will be potential problems if tlb entry for kernel space is not global. Such as fail to flush kernel tlb with the function local_flush_tlb_kernel_range() which supposed only flush tlb with global bit. Kernel address space areas include percpu, vmalloc, vmemmap, fixmap and kasan areas. For these areas both two consecutive page table entries should be enabled with PAGE_GLOBAL bit. So with function set_pte() and pte_clear(), pte buddy entry is checked and set besides its own pte entry. However it is not atomic operation to set both two pte entries, there is problem with test_vmalloc test case. So function kernel_pte_init() is added to init a pte table when it is created for kernel address space, and the default initial pte value is PAGE_GLOBAL rather than zero at beginning. Then only its own pte entry need update with function set_pte() and pte_clear(), nothing to do with the pte buddy entry. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/pgalloc.h | 11 +++++++++ arch/loongarch/include/asm/pgtable.h | 35 ++++++---------------------- arch/loongarch/mm/init.c | 2 ++ arch/loongarch/mm/pgtable.c | 20 ++++++++++++++++ include/linux/mm.h | 3 ++- mm/kasan/init.c | 8 ++++++- mm/sparse-vmemmap.c | 5 ++++ 7 files changed, 54 insertions(+), 30 deletions(-) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 79470f0b4f1d8..c9f9895f237d9 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -10,6 +10,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL #include static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -44,6 +45,16 @@ extern void pagetable_init(void); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + pte_t *pte = __pte_alloc_one_kernel(mm); + + if (pte) + kernel_pte_init(pte); + + return pte; +} + #define __pte_free_tlb(tlb, pte, address) \ do { \ pagetable_pte_dtor(page_ptdesc(pte)); \ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 0cfcfaf2ae507..1bda6522f13a8 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -269,6 +269,7 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm extern void pgd_init(void *addr); extern void pud_init(void *addr); extern void pmd_init(void *addr); +extern void kernel_pte_init(void *addr); /* * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that @@ -325,39 +326,17 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) { WRITE_ONCE(*ptep, pteval); - if (pte_val(pteval) & _PAGE_GLOBAL) { - pte_t *buddy = ptep_buddy(ptep); - /* - * Make sure the buddy is global too (if it's !none, - * it better already be global) - */ - if (pte_none(ptep_get(buddy))) { #ifdef CONFIG_SMP - /* - * For SMP, multiple CPUs can race, so we need - * to do this atomically. - */ - __asm__ __volatile__( - __AMOR "$zero, %[global], %[buddy] \n" - : [buddy] "+ZB" (buddy->pte) - : [global] "r" (_PAGE_GLOBAL) - : "memory"); - - DBAR(0b11000); /* o_wrw = 0b11000 */ -#else /* !CONFIG_SMP */ - WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); -#endif /* CONFIG_SMP */ - } - } + if (pte_val(pteval) & _PAGE_GLOBAL) + DBAR(0b11000); /* o_wrw = 0b11000 */ +#endif } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - /* Preserve global status for the pair */ - if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) - set_pte(ptep, __pte(_PAGE_GLOBAL)); - else - set_pte(ptep, __pte(0)); + pte_t pte = ptep_get(ptep); + pte_val(pte) &= _PAGE_GLOBAL; + set_pte(ptep, pte); } #define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 71fa4b773eb3c..14f74a55baaea 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -200,7 +200,9 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pte) panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); + kernel_pte_init(pte); } return pte_offset_kernel(pmd, addr); diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 6a038a27373aa..3b2fbe74d7d9b 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -116,6 +116,26 @@ void pud_init(void *addr) EXPORT_SYMBOL_GPL(pud_init); #endif +void kernel_pte_init(void *addr) +{ + unsigned long *p, *end; + + p = (unsigned long *)addr; + end = p + PTRS_PER_PTE; + + do { + p[0] = _PAGE_GLOBAL; + p[1] = _PAGE_GLOBAL; + p[2] = _PAGE_GLOBAL; + p[3] = _PAGE_GLOBAL; + p[4] = _PAGE_GLOBAL; + p += 8; + p[-3] = _PAGE_GLOBAL; + p[-2] = _PAGE_GLOBAL; + p[-1] = _PAGE_GLOBAL; + } while (p != end); +} + pmd_t mk_pmd(struct page *page, pgprot_t prot) { pmd_t pmd; diff --git a/include/linux/mm.h b/include/linux/mm.h index 297718269834d..c111ee69f4864 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3773,8 +3773,9 @@ void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); -void pmd_init(void *addr); void pud_init(void *addr); +void pmd_init(void *addr); +void kernel_pte_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 89895f38f7224..ac607c306292f 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -106,6 +106,10 @@ static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr, } } +void __weak __meminit kernel_pte_init(void *addr) +{ +} + static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end) { @@ -126,8 +130,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, if (slab_is_available()) p = pte_alloc_one_kernel(&init_mm); - else + else { p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + kernel_pte_init(p); + } if (!p) return -ENOMEM; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a2cbe44c48e10..2628fc02be08b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -184,6 +184,10 @@ static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) return p; } +void __weak __meminit kernel_pte_init(void *addr) +{ +} + pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) { pmd_t *pmd = pmd_offset(pud, addr); @@ -191,6 +195,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; + kernel_pte_init(p); pmd_populate_kernel(&init_mm, pmd, p); } return pmd; From 1c0610011e4efbdf76ead86e9ef428ee219ace79 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:37:52 +0800 Subject: [PATCH 08/19] LoongArch: Add AVEC irqchip support commit ae16f05c928a1336d5d9d19fd805d7bf29c3f0c8 upstream. Introduce the advanced extended interrupt controllers (AVECINTC). This feature will allow each core to have 256 independent interrupt vectors and MSI interrupts can be independently routed to any vector on any CPU. Co-developed-by: Jianmin Lv Signed-off-by: Jianmin Lv Co-developed-by: Liupu Wang Signed-off-by: Liupu Wang Co-developed-by: Huacai Chen Signed-off-by: Huacai Chen Signed-off-by: Tianyang Zhang Signed-off-by: Binbin Zhou --- .../arch/loongarch/irq-chip-model.rst | 32 ++ .../zh_CN/arch/loongarch/irq-chip-model.rst | 32 ++ MAINTAINERS | 1 + arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/cpu-features.h | 1 + arch/loongarch/include/asm/cpu.h | 2 + arch/loongarch/include/asm/hardirq.h | 3 +- arch/loongarch/include/asm/irq.h | 29 +- arch/loongarch/include/asm/loongarch.h | 18 +- arch/loongarch/include/asm/smp.h | 2 + arch/loongarch/kernel/cpu-probe.c | 3 +- arch/loongarch/kernel/irq.c | 14 + arch/loongarch/kernel/legacy_boot.h | 9 + arch/loongarch/kernel/smp.c | 6 + drivers/irqchip/Makefile | 2 +- drivers/irqchip/irq-loongarch-avec.c | 452 ++++++++++++++++++ drivers/irqchip/irq-loongarch-cpu.c | 7 +- drivers/irqchip/irq-loongson-eiointc.c | 9 +- drivers/irqchip/irq-loongson-htvec.c | 2 + drivers/irqchip/irq-loongson-liointc.c | 2 + drivers/irqchip/irq-loongson-pch-lpc.c | 2 + drivers/irqchip/irq-loongson-pch-msi.c | 44 +- drivers/irqchip/irq-loongson-pch-pic.c | 2 + drivers/irqchip/irq-loongson.h | 27 ++ include/linux/cpuhotplug.h | 4 + 25 files changed, 676 insertions(+), 30 deletions(-) create mode 100644 drivers/irqchip/irq-loongarch-avec.c create mode 100644 drivers/irqchip/irq-loongson.h diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst index 7988f41923639..6dd48256e39f7 100644 --- a/Documentation/arch/loongarch/irq-chip-model.rst +++ b/Documentation/arch/loongarch/irq-chip-model.rst @@ -85,6 +85,38 @@ to CPUINTC directly:: | Devices | +---------+ +Advanced Extended IRQ model +=========================== + +In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go +to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go +to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts +go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly:: + + +-----+ +-----------------------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +-----------------------+ +-------+ + ^ ^ ^ + | | | + +---------+ +----------+ +---------+ +-------+ + | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | + +---------+ +----------+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | Devices | | PCH-LPC | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + ACPI-related definitions ======================== diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst index f1e9ab18206c3..472761938682c 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst @@ -87,6 +87,38 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC:: | Devices | +---------+ +高级扩展IRQ模型 +=============== + +在这种模型里面,IPI(Inter-Processor Interrupt)和CPU本地时钟中断直接发送到CPUINTC, +CPU串口(UARTs)中断发送到LIOINTC,PCH-MSI中断发送到AVECINTC,而后通过AVECINTC直接 +送达CPUINTC,而其他所有设备的中断则分别发送到所连接的PCH-PIC/PCH-LPC,然后由EIOINTC +统一收集,再直接到达CPUINTC:: + + +-----+ +-----------------------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +-----------------------+ +-------+ + ^ ^ ^ + | | | + +---------+ +----------+ +---------+ +-------+ + | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs | + +---------+ +----------+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | Devices | | PCH-LPC | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + ACPI相关的定义 ============== diff --git a/MAINTAINERS b/MAINTAINERS index 687080ab63a9b..c8514eba5256e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12390,6 +12390,7 @@ F: Documentation/arch/loongarch/ F: Documentation/translations/zh_CN/arch/loongarch/ F: arch/loongarch/ F: drivers/*/*loongarch* +F: drivers/*/*loongson* LOONGSON GPIO DRIVER M: Yinbo Zhu diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2b227f287e622..50a3d95117598 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -75,6 +75,7 @@ config LOONGARCH select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IOREMAP if !ARCH_IOREMAP + select GENERIC_IRQ_MATRIX_ALLOCATOR select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 2eafe6a6aca81..16a716f88a5ca 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -65,5 +65,6 @@ #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) +#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 48b9f7168bcca..843f9c4ec9807 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -99,6 +99,7 @@ enum cpu_type_enum { #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ #define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ +#define CPU_FEATURE_AVECINT 27 /* CPU has avec interrupt */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -127,5 +128,6 @@ enum cpu_type_enum { #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) +#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index b26d596a73aa2..5f70cb77b54da 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -15,8 +15,9 @@ extern void ack_bad_irq(unsigned int irq); enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNCTION, + IPI_CLEAR_VECTOR, }; -#define NR_IPI 2 +#define NR_IPI 3 typedef struct { unsigned int ipi_irqs[NR_IPI]; diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index 00101b6d601e5..2342654b46212 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -39,6 +39,17 @@ void spurious_interrupt(void); #define NR_IRQS_LEGACY 16 +/* + * 256 Vectors Mapping for AVECINTC: + * + * 0 - 15: Mapping classic IPs, e.g. IP0-12. + * 16 - 255: Mapping vectors for external IRQ. + * + */ +#define NR_VECTORS 256 +#define NR_LEGACY_VECTORS 16 +#define IRQ_MATRIX_BITS NR_VECTORS + #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); @@ -65,7 +76,7 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS]; #define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15) #define LOONGSON_CPU_IRQ_BASE 16 -#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14) +#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 15) #define LOONGSON_PCH_IRQ_BASE 64 #define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47) @@ -88,20 +99,8 @@ struct acpi_madt_bio_pic; struct acpi_madt_msi_pic; struct acpi_madt_lpc_pic; -int liointc_acpi_init(struct irq_domain *parent, - struct acpi_madt_lio_pic *acpi_liointc); -int eiointc_acpi_init(struct irq_domain *parent, - struct acpi_madt_eio_pic *acpi_eiointc); - -int htvec_acpi_init(struct irq_domain *parent, - struct acpi_madt_ht_pic *acpi_htvec); -int pch_lpc_acpi_init(struct irq_domain *parent, - struct acpi_madt_lpc_pic *acpi_pchlpc); -int pch_msi_acpi_init(struct irq_domain *parent, - struct acpi_madt_msi_pic *acpi_pchmsi); -int pch_pic_acpi_init(struct irq_domain *parent, - struct acpi_madt_bio_pic *acpi_pchpic); -int find_pch_pic(u32 gsi); +void complete_irq_moving(void); + struct fwnode_handle *get_pch_msi_handle(int pci_segment); extern struct acpi_madt_lio_pic *acpi_liointc; diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 74875d470f5c6..6f44104e9ddec 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -253,8 +253,8 @@ #define CSR_ESTAT_EXC_WIDTH 6 #define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) #define CSR_ESTAT_IS_SHIFT 0 -#define CSR_ESTAT_IS_WIDTH 14 -#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT) +#define CSR_ESTAT_IS_WIDTH 15 +#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) #define LOONGARCH_CSR_ERA 0x6 /* Exception return address */ @@ -649,6 +649,13 @@ #define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */ +#define LOONGARCH_CSR_ISR0 0xa0 +#define LOONGARCH_CSR_ISR1 0xa1 +#define LOONGARCH_CSR_ISR2 0xa2 +#define LOONGARCH_CSR_ISR3 0xa3 + +#define LOONGARCH_CSR_IRR 0xa4 + #define LOONGARCH_CSR_PRID 0xc0 /* Shadow MCSR : 0xc0 ~ 0xff */ @@ -1071,7 +1078,7 @@ /* * CSR_ECFG IM */ -#define ECFG0_IM 0x00001fff +#define ECFG0_IM 0x00005fff #define ECFGB_SIP0 0 #define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0) #define ECFGB_SIP1 1 @@ -1114,6 +1121,7 @@ #define IOCSRF_EIODECODE BIT_ULL(9) #define IOCSRF_FLATMODE BIT_ULL(10) #define IOCSRF_VM BIT_ULL(11) +#define IOCSRF_AVEC BIT_ULL(15) #define LOONGARCH_IOCSR_VENDOR 0x10 @@ -1124,6 +1132,7 @@ #define LOONGARCH_IOCSR_MISC_FUNC 0x420 #define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21) #define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48) +#define IOCSR_MISC_FUNC_AVEC_EN BIT_ULL(51) #define LOONGARCH_IOCSR_CPUTEMP 0x428 @@ -1445,9 +1454,10 @@ __BUILD_CSR_OP(tlbidx) #define INT_TI 11 /* Timer */ #define INT_IPI 12 #define INT_NMI 13 +#define INT_AVEC 14 /* ExcCodes corresponding to interrupts */ -#define EXCCODE_INT_NUM (INT_NMI + 1) +#define EXCCODE_INT_NUM (INT_AVEC + 1) #define EXCCODE_INT_START 64 #define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1) diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index b6c09ebc194a0..f62e025dda101 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -67,9 +67,11 @@ extern int __cpu_logical_map[NR_CPUS]; #define ACTION_BOOT_CPU 0 #define ACTION_RESCHEDULE 1 #define ACTION_CALL_FUNCTION 2 +#define ACTION_CLEAR_VECTOR 3 #define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU) #define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE) #define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION) +#define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR) struct seq_file; diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 55320813ee081..14f0449f54520 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -106,7 +106,6 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) elf_hwcap |= HWCAP_LOONGARCH_CRC32; } - config = read_cpucfg(LOONGARCH_CPUCFG2); if (config & CPUCFG2_LAM) { c->options |= LOONGARCH_CPU_LAM; @@ -174,6 +173,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FLATMODE; if (config & IOCSRF_EIODECODE) c->options |= LOONGARCH_CPU_EIODECODE; + if (config & IOCSRF_AVEC) + c->options |= LOONGARCH_CPU_AVECINT; if (config & IOCSRF_VM) c->options |= LOONGARCH_CPU_HYPERVISOR; diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 6138168b4df1f..2f0d6d0ac8b59 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -87,6 +87,20 @@ static void __init init_vec_parent_group(void) acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } +int __init arch_probe_nr_irqs(void) +{ + int nr_io_pics = bitmap_weight(&loongson_sysconf.cores_io_master, + sizeof(loongson_sysconf.cores_io_master)*8); + + if (!cpu_has_avecint) + nr_irqs = (64 + NR_VECTORS * nr_io_pics); + else + nr_irqs = (64 + NR_VECTORS * (nr_cpu_ids + nr_io_pics)); + + return NR_IRQS_LEGACY; +} + + void __init init_IRQ(void) { int i; diff --git a/arch/loongarch/kernel/legacy_boot.h b/arch/loongarch/kernel/legacy_boot.h index 6d3a36ebaab71..61e9b08dad639 100644 --- a/arch/loongarch/kernel/legacy_boot.h +++ b/arch/loongarch/kernel/legacy_boot.h @@ -15,4 +15,13 @@ extern void bpi_memblock_init(unsigned long *p_max_low_pfn); extern void bpi_init_node_memblock(void (*p_add_numamem_region)(u64 start, u64 end, u32 type)); extern int loongarch_have_legacy_bpi(void); +extern int liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc); +extern int eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc); +extern int htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec); +extern int pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc); + #endif /* __LEGACY_BOOT_H_ */ diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 09ed129c9a526..78b42d06bc06f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -72,6 +72,7 @@ static DEFINE_PER_CPU(int, cpu_state); static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNCTION] = "Function call interrupts", + [IPI_CLEAR_VECTOR] = "Clear vector interrupts", }; void show_ipi_list(struct seq_file *p, int prec) @@ -246,6 +247,11 @@ static irqreturn_t loongson_ipi_interrupt(int irq, void *dev) per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++; } + if (action & SMP_CLEAR_VECTOR) { + complete_irq_moving(); + per_cpu(irq_stat, cpu).ipi_irqs[IPI_CLEAR_VECTOR]++; + } + return IRQ_HANDLED; } diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 7b04778db31a7..8a72dc73346de 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -105,7 +105,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o -obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o +obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o irq-loongarch-avec.o obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c new file mode 100644 index 0000000000000..e8c43b3fd8266 --- /dev/null +++ b/drivers/irqchip/irq-loongarch-avec.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2024 Loongson Technologies, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "irq-loongson.h" + +#define VECTORS_PER_REG 64 +#define IRR_VECTOR_MASK 0xffUL +#define IRR_INVALID_MASK 0x80000000UL +#define AVEC_MSG_OFFSET 0x100000 + +#ifdef CONFIG_SMP +struct pending_list { + struct list_head head; +}; + +static struct cpumask intersect_mask; +static DEFINE_PER_CPU(struct pending_list, pending_list); +#endif + +static DEFINE_PER_CPU(struct irq_desc * [NR_VECTORS], irq_map); + +struct avecintc_chip { + raw_spinlock_t lock; + struct fwnode_handle *fwnode; + struct irq_domain *domain; + struct irq_matrix *vector_matrix; + phys_addr_t msi_base_addr; +}; + +static struct avecintc_chip loongarch_avec; + +struct avecintc_data { + struct list_head entry; + unsigned int cpu; + unsigned int vec; + unsigned int prev_cpu; + unsigned int prev_vec; + unsigned int moving; +}; + +static inline void avecintc_ack_irq(struct irq_data *d) +{ +} + +static inline void avecintc_mask_irq(struct irq_data *d) +{ +} + +static inline void avecintc_unmask_irq(struct irq_data *d) +{ +} + +#ifdef CONFIG_SMP +static inline void pending_list_init(int cpu) +{ + struct pending_list *plist = per_cpu_ptr(&pending_list, cpu); + + INIT_LIST_HEAD(&plist->head); +} + +static void avecintc_sync(struct avecintc_data *adata) +{ + struct pending_list *plist; + + if (cpu_online(adata->prev_cpu)) { + plist = per_cpu_ptr(&pending_list, adata->prev_cpu); + list_add_tail(&adata->entry, &plist->head); + adata->moving = 1; + smp_ops.send_ipi_single(adata->prev_cpu, SMP_CLEAR_VECTOR); + } +} + +static int avecintc_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force) +{ + int cpu, ret, vector; + struct avecintc_data *adata; + + raw_spin_lock(&loongarch_avec.lock); + adata = irq_data_get_irq_chip_data(data); + + if (adata->moving) { + raw_spin_unlock(&loongarch_avec.lock); + return -EBUSY; + } + + if (adata->vec == UINT_MAX) { + raw_spin_unlock(&loongarch_avec.lock); + return -EINVAL; + } + + if (cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest)) { + raw_spin_unlock(&loongarch_avec.lock); + return 0; + } + + cpumask_and(&intersect_mask, dest, cpu_online_mask); + + ret = irq_matrix_alloc(loongarch_avec.vector_matrix, &intersect_mask, false, &cpu); + if (ret < 0) { + raw_spin_unlock(&loongarch_avec.lock); + return ret; + } + + vector = ret; + adata->cpu = cpu; + adata->vec = vector; + per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(data); + avecintc_sync(adata); + + raw_spin_unlock(&loongarch_avec.lock); + irq_data_update_effective_affinity(data, cpumask_of(cpu)); + + return IRQ_SET_MASK_OK; +} + +static int avecintc_cpu_online(unsigned int cpu) +{ + if (!loongarch_avec.vector_matrix) + return 0; + + raw_spin_lock(&loongarch_avec.lock); + + irq_matrix_online(loongarch_avec.vector_matrix); + + pending_list_init(cpu); + + raw_spin_unlock(&loongarch_avec.lock); + + return 0; +} + +static int avecintc_cpu_offline(unsigned int cpu) +{ + struct pending_list *plist = per_cpu_ptr(&pending_list, cpu); + + if (!loongarch_avec.vector_matrix) + return 0; + + raw_spin_lock(&loongarch_avec.lock); + + if (!list_empty(&plist->head)) + pr_warn("CPU#%d vector is busy\n", cpu); + irq_matrix_offline(loongarch_avec.vector_matrix); + + raw_spin_unlock(&loongarch_avec.lock); + + return 0; +} + +void complete_irq_moving(void) +{ + struct pending_list *plist = this_cpu_ptr(&pending_list); + struct avecintc_data *adata, *tdata; + int cpu, vector, bias; + uint64_t isr; + + raw_spin_lock(&loongarch_avec.lock); + + list_for_each_entry_safe(adata, tdata, &plist->head, entry) { + cpu = adata->prev_cpu; + vector = adata->prev_vec; + bias = vector / VECTORS_PER_REG; + switch (bias) { + case 0: + isr = csr_read64(LOONGARCH_CSR_ISR0); + break; + case 1: + isr = csr_read64(LOONGARCH_CSR_ISR1); + break; + case 2: + isr = csr_read64(LOONGARCH_CSR_ISR2); + break; + case 3: + isr = csr_read64(LOONGARCH_CSR_ISR3); + break; + } + + if (isr & (1UL << (vector % VECTORS_PER_REG))) { + smp_ops.send_ipi_single(cpu, SMP_CLEAR_VECTOR); + continue; + } + list_del(&adata->entry); + irq_matrix_free(loongarch_avec.vector_matrix, cpu, vector, false); + this_cpu_write(irq_map[vector], NULL); + adata->moving = 0; + adata->prev_cpu = adata->cpu; + adata->prev_vec = adata->vec; + } + + raw_spin_unlock(&loongarch_avec.lock); +} +#endif + +static void avecintc_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct avecintc_data *adata = irq_data_get_irq_chip_data(d); + + msg->address_hi = 0x0; + msg->address_lo = (loongarch_avec.msi_base_addr | (adata->vec & 0xff) << 4) + | ((cpu_logical_map(adata->cpu & 0xffff)) << 12); + msg->data = 0x0; +} + +static struct irq_chip avec_irq_controller = { + .name = "AVECINTC", + .irq_ack = avecintc_ack_irq, + .irq_mask = avecintc_mask_irq, + .irq_unmask = avecintc_unmask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = avecintc_set_affinity, +#endif + .irq_compose_msi_msg = avecintc_compose_msi_msg, +}; + +static void avecintc_irq_dispatch(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irq_desc *d; + + chained_irq_enter(chip, desc); + + while (true) { + unsigned long vector = csr_read64(LOONGARCH_CSR_IRR); + + if (vector & IRR_INVALID_MASK) + break; + + vector &= IRR_VECTOR_MASK; + + d = this_cpu_read(irq_map[vector]); + if (d) { + generic_handle_irq_desc(d); + } else { + spurious_interrupt(); + pr_warn("Unexpected IRQ occurs on CPU#%d [vector %ld]\n", + smp_processor_id(), vector); + } + } + + chained_irq_exit(chip, desc); +} + +static int avecintc_alloc_vector(struct irq_data *irqd, struct avecintc_data *adata) +{ + int cpu, ret; + unsigned long flags; + + raw_spin_lock_irqsave(&loongarch_avec.lock, flags); + + ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu); + if (ret < 0) { + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags); + return ret; + } + + adata->prev_cpu = adata->cpu = cpu; + adata->prev_vec = adata->vec = ret; + per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd); + + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags); + + return 0; +} + +static int avecintc_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, void *arg) +{ + for (unsigned int i = 0; i < nr_irqs; i++) { + struct irq_data *irqd = irq_domain_get_irq_data(domain, virq + i); + struct avecintc_data *adata = kzalloc(sizeof(*adata), GFP_KERNEL); + int ret; + + if (!adata) + return -ENOMEM; + + ret = avecintc_alloc_vector(irqd, adata); + if (ret < 0) { + kfree(adata); + return ret; + } + + irq_domain_set_info(domain, virq + i, virq + i, &avec_irq_controller, + adata, handle_edge_irq, NULL, NULL); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); + } + + return 0; +} + +static void avecintc_free_vector(struct irq_data *irqd, struct avecintc_data *adata) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&loongarch_avec.lock, flags); + + per_cpu(irq_map, adata->cpu)[adata->vec] = NULL; + irq_matrix_free(loongarch_avec.vector_matrix, adata->cpu, adata->vec, false); + +#ifdef CONFIG_SMP + if (!adata->moving) { + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags); + return; + } + + per_cpu(irq_map, adata->prev_cpu)[adata->prev_vec] = NULL; + irq_matrix_free(loongarch_avec.vector_matrix, adata->prev_cpu, adata->prev_vec, false); + list_del_init(&adata->entry); +#endif + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags); +} + +static void avecintc_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + for (unsigned int i = 0; i < nr_irqs; i++) { + struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); + + if (d) { + struct avecintc_data *adata = irq_data_get_irq_chip_data(d); + + avecintc_free_vector(d, adata); + irq_domain_reset_irq_data(d); + kfree(adata); + } + } +} + +static const struct irq_domain_ops avecintc_domain_ops = { + .alloc = avecintc_domain_alloc, + .free = avecintc_domain_free, +}; + +static int __init irq_matrix_init(void) +{ + loongarch_avec.vector_matrix = irq_alloc_matrix(NR_VECTORS, 0, NR_VECTORS); + if (!loongarch_avec.vector_matrix) + return -ENOMEM; + + for (int i = 0; i < NR_LEGACY_VECTORS; i++) + irq_matrix_assign_system(loongarch_avec.vector_matrix, i, false); + + irq_matrix_online(loongarch_avec.vector_matrix); + + return 0; +} + +static int __init avecintc_init(struct irq_domain *parent) +{ + int ret, parent_irq; + unsigned long value; + + raw_spin_lock_init(&loongarch_avec.lock); + + loongarch_avec.fwnode = irq_domain_alloc_named_fwnode("AVECINTC"); + if (!loongarch_avec.fwnode) { + pr_err("Unable to allocate domain handle\n"); + ret = -ENOMEM; + goto out; + } + + loongarch_avec.domain = irq_domain_create_tree(loongarch_avec.fwnode, + &avecintc_domain_ops, NULL); + if (!loongarch_avec.domain) { + pr_err("Unable to create IRQ domain\n"); + ret = -ENOMEM; + goto out_free_handle; + } + + parent_irq = irq_create_mapping(parent, INT_AVEC); + if (!parent_irq) { + pr_err("Failed to mapping hwirq\n"); + ret = -EINVAL; + goto out_remove_domain; + } + + ret = irq_matrix_init(); + if (ret < 0) { + pr_err("Failed to init irq matrix\n"); + goto out_remove_domain; + } + irq_set_chained_handler_and_data(parent_irq, avecintc_irq_dispatch, NULL); + +#ifdef CONFIG_SMP + pending_list_init(0); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_AVECINTC_STARTING, + "irqchip/loongarch/avecintc:starting", + avecintc_cpu_online, avecintc_cpu_offline); +#endif + value = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC); + value |= IOCSR_MISC_FUNC_AVEC_EN; + iocsr_write64(value, LOONGARCH_IOCSR_MISC_FUNC); + + return ret; + +out_remove_domain: + irq_domain_remove(loongarch_avec.domain); +out_free_handle: + irq_domain_free_fwnode(loongarch_avec.fwnode); +out: + return ret; +} + +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header; + + loongarch_avec.msi_base_addr = pchmsi_entry->msg_address - AVEC_MSG_OFFSET; + + return pch_msi_acpi_init_avec(loongarch_avec.domain); +} + +static inline int __init acpi_cascade_irqdomain_init(void) +{ + return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1); +} + +int __init avecintc_acpi_init(struct irq_domain *parent) +{ + int ret = avecintc_init(parent); + + if (ret < 0) { + pr_err("Failed to init IRQ domain\n"); + return ret; + } + + ret = acpi_cascade_irqdomain_init(); + if (ret < 0) { + pr_err("Failed to init cascade IRQ domain\n"); + return ret; + } + + return ret; +} diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index b35903a06902f..e62dab4c97fcf 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -13,6 +13,8 @@ #include #include +#include "irq-loongson.h" + static struct irq_domain *irq_domain; struct fwnode_handle *cpuintc_handle; @@ -140,7 +142,10 @@ static int __init acpi_cascade_irqdomain_init(void) if (r < 0) return r; - return 0; + if (cpu_has_avecint) + r = avecintc_acpi_init(irq_domain); + + return r; } static int __init cpuintc_acpi_init(union acpi_subtable_headers *header, diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 595b954b9a701..0799a3a72ed85 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -17,6 +17,8 @@ #include #include +#include "irq-loongson.h" + #define EIOINTC_REG_NODEMAP 0x14a0 #define EIOINTC_REG_IPMAP 0x14c0 #define EIOINTC_REG_ENABLE 0x1600 @@ -371,6 +373,9 @@ static int __init acpi_cascade_irqdomain_init(void) if (r < 0) return r; + if (cpu_has_avecint) + return 0; + r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1); if (r < 0) return r; @@ -418,8 +423,8 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, if (nr_pics == 1) { register_syscore_ops(&eiointc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING, - "irqchip/loongarch/intc:starting", + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_EIOINTC_STARTING, + "irqchip/loongarch/eiointc:starting", eiointc_router_init, NULL); } diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c index 0bff728b25e3d..5da02c7ad0b30 100644 --- a/drivers/irqchip/irq-loongson-htvec.c +++ b/drivers/irqchip/irq-loongson-htvec.c @@ -17,6 +17,8 @@ #include #include +#include "irq-loongson.h" + /* Registers */ #define HTVEC_EN_OFF 0x20 #define HTVEC_MAX_PARENT_IRQ 8 diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 0262cbefe9ddb..257aa699ec984 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -22,6 +22,8 @@ #include #endif +#include "irq-loongson.h" + #define LIOINTC_CHIP_IRQ 32 #define LIOINTC_NUM_PARENT 4 #define LIOINTC_NUM_CORES 4 diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c index 9b35492fb6be9..2d4c3ec128b8f 100644 --- a/drivers/irqchip/irq-loongson-pch-lpc.c +++ b/drivers/irqchip/irq-loongson-pch-lpc.c @@ -15,6 +15,8 @@ #include #include +#include "irq-loongson.h" + /* Registers */ #define LPC_INT_CTL 0x00 #define LPC_INT_ENA 0x04 diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index dd4d699170f4e..2c9f58536fce4 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -15,6 +15,8 @@ #include #include +#include "irq-loongson.h" + static int nr_pics; struct pch_msi_data { @@ -266,17 +268,17 @@ IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_of_init); #ifdef CONFIG_ACPI struct fwnode_handle *get_pch_msi_handle(int pci_segment) { - int i; + if (cpu_has_avecint) + return pch_msi_handle[0]; - for (i = 0; i < MAX_IO_PICS; i++) { + for (int i = 0; i < MAX_IO_PICS; i++) { if (msi_group[i].pci_segment == pci_segment) return pch_msi_handle[i]; } - return NULL; + return pch_msi_handle[0]; } -int __init pch_msi_acpi_init(struct irq_domain *parent, - struct acpi_madt_msi_pic *acpi_pchmsi) +int __init pch_msi_acpi_init(struct irq_domain *parent, struct acpi_madt_msi_pic *acpi_pchmsi) { int ret; struct fwnode_handle *domain_handle; @@ -289,4 +291,36 @@ int __init pch_msi_acpi_init(struct irq_domain *parent, return ret; } + +static struct irq_chip pch_msi_irq_chip_avec = { + .name = "PCH PCI MSI", + .irq_ack = irq_chip_ack_parent, +}; + +static struct msi_domain_info pch_msi_domain_info_avec = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + .chip = &pch_msi_irq_chip_avec, +}; + +int __init pch_msi_acpi_init_avec(struct irq_domain *parent) +{ + struct irq_domain *msi_domain; + + if (pch_msi_handle[0]) + return 0; + + pch_msi_handle[0] = parent->fwnode; + irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS); + + msi_domain = pci_msi_create_irq_domain(pch_msi_handle[0], + &pch_msi_domain_info_avec, parent); + if (!msi_domain) { + pr_err("Failed to create PCI MSI domain\n"); + kfree(pch_msi_handle[0]); + return -ENOMEM; + } + + return 0; +} #endif diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index aed6a18a780cc..6ebc866599618 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -17,6 +17,8 @@ #include #include +#include "irq-loongson.h" + /* Registers */ #define PCH_PIC_MASK 0x20 #define PCH_PIC_HTMSI_EN 0x40 diff --git a/drivers/irqchip/irq-loongson.h b/drivers/irqchip/irq-loongson.h new file mode 100644 index 0000000000000..11fa138d1f443 --- /dev/null +++ b/drivers/irqchip/irq-loongson.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef _DRIVERS_IRQCHIP_IRQ_LOONGSON_H +#define _DRIVERS_IRQCHIP_IRQ_LOONGSON_H + +int find_pch_pic(u32 gsi); + +int liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc); +int eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc); +int avecintc_acpi_init(struct irq_domain *parent); + +int htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec); +int pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc); +int pch_pic_acpi_init(struct irq_domain *parent, + struct acpi_madt_bio_pic *acpi_pchpic); +int pch_msi_acpi_init(struct irq_domain *parent, + struct acpi_madt_msi_pic *acpi_pchmsi); +int pch_msi_acpi_init_avec(struct irq_domain *parent); + +#endif /* _DRIVERS_IRQCHIP_IRQ_LOONGSON_H */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 624d4a38c358a..5e23cc0ad4077 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -153,6 +153,10 @@ enum cpuhp_state { CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_RISCV_STARTING, CPUHP_AP_IRQ_LOONGARCH_STARTING, +#ifdef CONFIG_LOONGARCH + CPUHP_AP_IRQ_EIOINTC_STARTING, + CPUHP_AP_IRQ_AVECINTC_STARTING, +#endif CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, From dd50566150526195e244ecae478e9eb4047e6bc0 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:48:00 +0800 Subject: [PATCH 09/19] LoongArch: Add CPU HWMon platform driver This add CPU HWMon (temperature sensor) platform driver for Loongson-3. Tested-by: Xi Ruoyao Signed-off-by: Huacai Chen Signed-off-by: Kexy Biscuit Signed-off-by: gaojuxin Signed-off-by: Binbin Zhou --- drivers/platform/loongarch/Kconfig | 8 + drivers/platform/loongarch/Makefile | 1 + drivers/platform/loongarch/cpu_hwmon.c | 196 +++++++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 drivers/platform/loongarch/cpu_hwmon.c diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig index 5633e4d73991a..9ec1a86ef7fae 100644 --- a/drivers/platform/loongarch/Kconfig +++ b/drivers/platform/loongarch/Kconfig @@ -16,6 +16,14 @@ menuconfig LOONGARCH_PLATFORM_DEVICES if LOONGARCH_PLATFORM_DEVICES +config CPU_HWMON + bool "Loongson CPU HWMon Driver" + depends on MACH_LOONGSON64 + select HWMON + default y + help + Loongson-3A/3B/3C CPU HWMon (temperature sensor) driver. + config LOONGSON_LAPTOP tristate "Generic Loongson-3 Laptop Driver" depends on ACPI diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile index f43ab03db1a2d..695688bed4232 100644 --- a/drivers/platform/loongarch/Makefile +++ b/drivers/platform/loongarch/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_LOONGSON_LAPTOP) += loongson-laptop.o +obj-$(CONFIG_CPU_HWMON) += cpu_hwmon.o diff --git a/drivers/platform/loongarch/cpu_hwmon.c b/drivers/platform/loongarch/cpu_hwmon.c new file mode 100644 index 0000000000000..c705d088c44a8 --- /dev/null +++ b/drivers/platform/loongarch/cpu_hwmon.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + +#include + +static int nr_packages; +static struct device *cpu_hwmon_dev; + +static int loongson3_cpu_temp(int cpu) +{ + u32 reg; + + reg = iocsr_read32(LOONGARCH_IOCSR_CPUTEMP) & 0xff; + + return (int)((s8)reg) * 1000; +} + +static ssize_t cpu_temp_label(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + + return sprintf(buf, "CPU %d Temperature\n", id); +} + +static ssize_t get_cpu_temp(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + int value = loongson3_cpu_temp(id); + + return sprintf(buf, "%d\n", value); +} + +static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); +static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); +static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); +static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp5_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp5_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp6_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp6_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp7_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp7_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp8_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp8_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp9_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp9_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp10_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp10_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp11_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp11_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp12_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp12_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp13_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp13_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp14_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp14_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp15_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp15_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp16_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp16_label, 0444, cpu_temp_label, NULL, 4); + +static struct attribute *cpu_hwmon_attributes[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, + &sensor_dev_attr_temp4_input.dev_attr.attr, + &sensor_dev_attr_temp4_label.dev_attr.attr, + &sensor_dev_attr_temp5_input.dev_attr.attr, + &sensor_dev_attr_temp5_label.dev_attr.attr, + &sensor_dev_attr_temp6_input.dev_attr.attr, + &sensor_dev_attr_temp6_label.dev_attr.attr, + &sensor_dev_attr_temp7_input.dev_attr.attr, + &sensor_dev_attr_temp7_label.dev_attr.attr, + &sensor_dev_attr_temp8_input.dev_attr.attr, + &sensor_dev_attr_temp8_label.dev_attr.attr, + &sensor_dev_attr_temp9_input.dev_attr.attr, + &sensor_dev_attr_temp9_label.dev_attr.attr, + &sensor_dev_attr_temp10_input.dev_attr.attr, + &sensor_dev_attr_temp10_label.dev_attr.attr, + &sensor_dev_attr_temp11_input.dev_attr.attr, + &sensor_dev_attr_temp11_label.dev_attr.attr, + &sensor_dev_attr_temp12_input.dev_attr.attr, + &sensor_dev_attr_temp12_label.dev_attr.attr, + &sensor_dev_attr_temp13_input.dev_attr.attr, + &sensor_dev_attr_temp13_label.dev_attr.attr, + &sensor_dev_attr_temp14_input.dev_attr.attr, + &sensor_dev_attr_temp14_label.dev_attr.attr, + &sensor_dev_attr_temp15_input.dev_attr.attr, + &sensor_dev_attr_temp15_label.dev_attr.attr, + &sensor_dev_attr_temp16_input.dev_attr.attr, + &sensor_dev_attr_temp16_label.dev_attr.attr, + NULL +}; +static umode_t cpu_hwmon_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + int id = i / 2; + + if (id < nr_packages) + return attr->mode; + return 0; +} + +static struct attribute_group cpu_hwmon_group = { + .attrs = cpu_hwmon_attributes, + .is_visible = cpu_hwmon_is_visible, +}; + +static const struct attribute_group *cpu_hwmon_groups[] = { + &cpu_hwmon_group, + NULL +}; + +static int cpu_initial_threshold = 72000; +static int cpu_thermal_threshold = 96000; +module_param(cpu_thermal_threshold, int, 0644); +MODULE_PARM_DESC(cpu_thermal_threshold, "cpu thermal threshold (96000 (default))"); + +static struct delayed_work thermal_work; + +static void do_thermal_timer(struct work_struct *work) +{ + int i, value, temp_max = 0; + + for (i = 0; i < nr_packages; i++) { + value = loongson3_cpu_temp(i); + if (value > temp_max) + temp_max = value; + } + + if (temp_max <= cpu_thermal_threshold) + schedule_delayed_work(&thermal_work, msecs_to_jiffies(5000)); + else + orderly_poweroff(true); +} + +static int __init loongson_hwmon_init(void) +{ + int i, value, temp_max = 0; + + pr_info("Loongson Hwmon Enter...\n"); + + nr_packages = loongson_sysconf.nr_cpus / + loongson_sysconf.cores_per_package; + + cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon", + NULL, cpu_hwmon_groups); + if (IS_ERR(cpu_hwmon_dev)) { + pr_err("Hwmon register fail with %ld!\n", PTR_ERR(cpu_hwmon_dev)); + return PTR_ERR(cpu_hwmon_dev); + } + + for (i = 0; i < nr_packages; i++) { + value = loongson3_cpu_temp(i); + if (value > temp_max) + temp_max = value; + } + + pr_info("Initial CPU temperature is %d (highest).\n", temp_max); + if (temp_max > cpu_initial_threshold) + cpu_thermal_threshold += temp_max - cpu_initial_threshold; + + INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer); + schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000)); + + return 0; +} + +static void __exit loongson_hwmon_exit(void) +{ + cancel_delayed_work_sync(&thermal_work); + hwmon_device_unregister(cpu_hwmon_dev); +} + +module_init(loongson_hwmon_init); +module_exit(loongson_hwmon_exit); + +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson CPU Hwmon driver"); +MODULE_LICENSE("GPL"); From f34084925938a90c5eeccd7cd62544024dba308a Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:50:11 +0800 Subject: [PATCH 10/19] LoongArch: Fix i2c related issues Signed-off-by: wanghongliang Signed-off-by: Binbin Zhou --- arch/loongarch/configs/loongson3_defconfig | 2 + drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-loongson-64bit.c | 298 ++++++++++++++++++--- drivers/i2c/busses/i2c-ls2x.c | 12 +- 4 files changed, 268 insertions(+), 46 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 0c0c86d82678e..55956cda2fa84 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -1674,6 +1674,8 @@ CONFIG_HID_ALPS=m CONFIG_HID_PID=y CONFIG_USB_HIDDEV=y CONFIG_I2C_HID=m +CONFIG_I2C_HID_ACPI=m +CONFIG_I2C_HID_OF=m CONFIG_USB_LED_TRIG=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e48ade3456fd4..59aab5ec035a9 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -390,7 +390,7 @@ config GPIO_LOONGSON config GPIO_LOONGSON_64BIT tristate "Loongson 64 bit GPIO support" - depends on LOONGARCH || COMPILE_TEST + depends on LOONGARCH || (64BIT && COMPILE_TEST) depends on OF_GPIO select GPIO_GENERIC help diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index 06213bbfabdd5..f916a5ae0290a 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -26,6 +26,7 @@ struct loongson_gpio_chip_data { unsigned int conf_offset; unsigned int out_offset; unsigned int in_offset; + unsigned int inten_offset; }; struct loongson_gpio_chip { @@ -33,6 +34,8 @@ struct loongson_gpio_chip { struct fwnode_handle *fwnode; spinlock_t lock; void __iomem *reg_base; + u16 *gsi_idx_map; + u16 mapsize; const struct loongson_gpio_chip_data *chip_data; }; @@ -41,15 +44,40 @@ static inline struct loongson_gpio_chip *to_loongson_gpio_chip(struct gpio_chip return container_of(chip, struct loongson_gpio_chip, chip); } -static inline void loongson_commit_direction(struct loongson_gpio_chip *lgpio, unsigned int pin, - int input) +static inline void loongson_commit_direction_bit(struct loongson_gpio_chip *lgpio, + unsigned int pin, int input) +{ + u64 temp; + + temp = readq(lgpio->reg_base + lgpio->chip_data->conf_offset); + if (input) + temp |= 1ULL << pin; + else + temp &= ~(1ULL << pin); + writeq(temp, lgpio->reg_base + lgpio->chip_data->conf_offset); +} + +static inline void loongson_commit_direction_byte(struct loongson_gpio_chip *lgpio, + unsigned int pin, int input) { u8 bval = input ? 1 : 0; writeb(bval, lgpio->reg_base + lgpio->chip_data->conf_offset + pin); } -static void loongson_commit_level(struct loongson_gpio_chip *lgpio, unsigned int pin, int high) +static void loongson_commit_level_bit(struct loongson_gpio_chip *lgpio, unsigned int pin, int high) +{ + u64 temp; + + temp = readq(lgpio->reg_base + lgpio->chip_data->out_offset); + if (high) + temp |= 1ULL << pin; + else + temp &= ~(1ULL << pin); + writeq(temp, lgpio->reg_base + lgpio->chip_data->out_offset); +} + +static void loongson_commit_level_byte(struct loongson_gpio_chip *lgpio, unsigned int pin, int high) { u8 bval = high ? 1 : 0; @@ -62,7 +90,10 @@ static int loongson_gpio_direction_input(struct gpio_chip *chip, unsigned int pi struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); spin_lock_irqsave(&lgpio->lock, flags); - loongson_commit_direction(lgpio, pin, 1); + if (lgpio->chip_data->mode == BIT_CTRL_MODE) + loongson_commit_direction_bit(lgpio, pin, 1); + else + loongson_commit_direction_byte(lgpio, pin, 1); spin_unlock_irqrestore(&lgpio->lock, flags); return 0; @@ -74,8 +105,13 @@ static int loongson_gpio_direction_output(struct gpio_chip *chip, unsigned int p struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); spin_lock_irqsave(&lgpio->lock, flags); - loongson_commit_level(lgpio, pin, value); - loongson_commit_direction(lgpio, pin, 0); + if (lgpio->chip_data->mode == BIT_CTRL_MODE) { + loongson_commit_level_bit(lgpio, pin, value); + loongson_commit_direction_bit(lgpio, pin, 0); + } else { + loongson_commit_level_byte(lgpio, pin, value); + loongson_commit_direction_byte(lgpio, pin, 0); + } spin_unlock_irqrestore(&lgpio->lock, flags); return 0; @@ -84,11 +120,17 @@ static int loongson_gpio_direction_output(struct gpio_chip *chip, unsigned int p static int loongson_gpio_get(struct gpio_chip *chip, unsigned int pin) { u8 bval; + u64 qval; int val; struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); - bval = readb(lgpio->reg_base + lgpio->chip_data->in_offset + pin); - val = bval & 1; + if (lgpio->chip_data->mode == BIT_CTRL_MODE) { + qval = readq(lgpio->reg_base + lgpio->chip_data->in_offset); + val = ((qval & (1ULL << pin)) != 0); + } else { + bval = readb(lgpio->reg_base + lgpio->chip_data->in_offset + pin); + val = (bval & 1); + } return val; } @@ -96,13 +138,22 @@ static int loongson_gpio_get(struct gpio_chip *chip, unsigned int pin) static int loongson_gpio_get_direction(struct gpio_chip *chip, unsigned int pin) { u8 bval; + u64 qval; + int val; struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); - bval = readb(lgpio->reg_base + lgpio->chip_data->conf_offset + pin); - if (bval & 1) - return GPIO_LINE_DIRECTION_IN; + if (lgpio->chip_data->mode == BIT_CTRL_MODE) { + qval = readq(lgpio->reg_base + lgpio->chip_data->conf_offset); + val = ((qval & (1ULL << pin)) != 0); + } else { + bval = readb(lgpio->reg_base + lgpio->chip_data->conf_offset + pin); + val = bval & 1; + } + + if (val) + return 1; - return GPIO_LINE_DIRECTION_OUT; + return 0; } static void loongson_gpio_set(struct gpio_chip *chip, unsigned int pin, int value) @@ -111,52 +162,73 @@ static void loongson_gpio_set(struct gpio_chip *chip, unsigned int pin, int valu struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); spin_lock_irqsave(&lgpio->lock, flags); - loongson_commit_level(lgpio, pin, value); + if (lgpio->chip_data->mode == BIT_CTRL_MODE) + loongson_commit_level_bit(lgpio, pin, value); + else + loongson_commit_level_byte(lgpio, pin, value); spin_unlock_irqrestore(&lgpio->lock, flags); } static int loongson_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) { + unsigned int u; struct platform_device *pdev = to_platform_device(chip->parent); + struct loongson_gpio_chip *lgpio = to_loongson_gpio_chip(chip); + + if (lgpio->chip_data->mode == BIT_CTRL_MODE) { + /* Get the register index from offset then multiply by bytes per register */ + u = readl(lgpio->reg_base + lgpio->chip_data->inten_offset + (offset / 32) * 4); + u |= BIT(offset % 32); + writel(u, lgpio->reg_base + lgpio->chip_data->inten_offset + (offset / 32) * 4); + } else { + writeb(1, lgpio->reg_base + lgpio->chip_data->inten_offset + offset); + } + + if ((lgpio->gsi_idx_map != NULL) && (offset < lgpio->mapsize)) + offset = lgpio->gsi_idx_map[offset]; return platform_get_irq(pdev, offset); } static int loongson_gpio_init(struct device *dev, struct loongson_gpio_chip *lgpio, - struct device_node *np, void __iomem *reg_base) + void __iomem *reg_base) { - int ret; u32 ngpios; + u32 gpio_base; + int rval; lgpio->reg_base = reg_base; - - if (lgpio->chip_data->mode == BIT_CTRL_MODE) { - ret = bgpio_init(&lgpio->chip, dev, 8, - lgpio->reg_base + lgpio->chip_data->in_offset, - lgpio->reg_base + lgpio->chip_data->out_offset, - NULL, NULL, - lgpio->reg_base + lgpio->chip_data->conf_offset, - 0); - if (ret) { - dev_err(dev, "unable to init generic GPIO\n"); - return ret; + lgpio->chip.direction_input = loongson_gpio_direction_input; + lgpio->chip.get = loongson_gpio_get; + lgpio->chip.get_direction = loongson_gpio_get_direction; + lgpio->chip.direction_output = loongson_gpio_direction_output; + lgpio->chip.set = loongson_gpio_set; + lgpio->chip.parent = dev; + device_property_read_u32(dev, "gpio_base", &gpio_base); + if (!gpio_base) + gpio_base = -1; + lgpio->chip.base = gpio_base; + device_property_read_u32(dev, "ngpios", &ngpios); + lgpio->chip.ngpio = ngpios; + rval = device_property_read_u16_array(dev, "gsi_idx_map", NULL, 0); + if (rval > 0) { + lgpio->gsi_idx_map = + kmalloc_array(rval, sizeof(*lgpio->gsi_idx_map), + GFP_KERNEL); + if (unlikely(!lgpio->gsi_idx_map)) { + dev_err(dev, "Alloc gsi_idx_map fail!\n"); + } else { + lgpio->mapsize = rval; + device_property_read_u16_array(dev, "gsi_idx_map", + lgpio->gsi_idx_map, lgpio->mapsize); } - } else { - lgpio->chip.direction_input = loongson_gpio_direction_input; - lgpio->chip.get = loongson_gpio_get; - lgpio->chip.get_direction = loongson_gpio_get_direction; - lgpio->chip.direction_output = loongson_gpio_direction_output; - lgpio->chip.set = loongson_gpio_set; - lgpio->chip.parent = dev; - spin_lock_init(&lgpio->lock); } + spin_lock_init(&lgpio->lock); - device_property_read_u32(dev, "ngpios", &ngpios); - - lgpio->chip.can_sleep = 0; - lgpio->chip.ngpio = ngpios; lgpio->chip.label = lgpio->chip_data->label; - lgpio->chip.to_irq = loongson_gpio_to_irq; + lgpio->chip.can_sleep = false; + if (lgpio->chip_data->inten_offset) + lgpio->chip.to_irq = loongson_gpio_to_irq; return devm_gpiochip_add_data(dev, &lgpio->chip, lgpio); } @@ -165,7 +237,6 @@ static int loongson_gpio_probe(struct platform_device *pdev) { void __iomem *reg_base; struct loongson_gpio_chip *lgpio; - struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; lgpio = devm_kzalloc(dev, sizeof(*lgpio), GFP_KERNEL); @@ -178,7 +249,7 @@ static int loongson_gpio_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - return loongson_gpio_init(dev, lgpio, np, reg_base); + return loongson_gpio_init(dev, lgpio, reg_base); } static const struct loongson_gpio_chip_data loongson_gpio_ls2k_data = { @@ -187,6 +258,60 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls2k_data = { .conf_offset = 0x0, .in_offset = 0x20, .out_offset = 0x10, + .inten_offset = 0x30, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls2k0500_data0 = { + .label = "ls2k0500_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0x8, + .out_offset = 0x10, + .inten_offset = 0xb0, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls2k0500_data1 = { + .label = "ls2k0500_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0x8, + .out_offset = 0x10, + .inten_offset = 0x98, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data0 = { + .label = "ls2k2000_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0xc, + .out_offset = 0x8, + .inten_offset = 0x14, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data1 = { + .label = "ls2k2000_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0x20, + .out_offset = 0x10, + .inten_offset = 0x30, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls2k2000_data2 = { + .label = "ls2k2000_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x4, + .in_offset = 0x8, + .out_offset = 0x0, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls3a5000_data = { + .label = "ls3a5000_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0xc, + .out_offset = 0x8, + .inten_offset = 0x14, }; static const struct loongson_gpio_chip_data loongson_gpio_ls7a_data = { @@ -195,6 +320,33 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls7a_data = { .conf_offset = 0x800, .in_offset = 0xa00, .out_offset = 0x900, + .inten_offset = 0xb00, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data0 = { + .label = "ls7a2000_gpio", + .mode = BYTE_CTRL_MODE, + .conf_offset = 0x800, + .in_offset = 0xa00, + .out_offset = 0x900, + .inten_offset = 0xb00, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data1 = { + .label = "ls7a2000_gpio", + .mode = BYTE_CTRL_MODE, + .conf_offset = 0x84, + .in_offset = 0x88, + .out_offset = 0x80, +}; + +static const struct loongson_gpio_chip_data loongson_gpio_ls3a6000_data = { + .label = "ls3a6000_gpio", + .mode = BIT_CTRL_MODE, + .conf_offset = 0x0, + .in_offset = 0xc, + .out_offset = 0x8, + .inten_offset = 0x14, }; static const struct of_device_id loongson_gpio_of_match[] = { @@ -202,10 +354,46 @@ static const struct of_device_id loongson_gpio_of_match[] = { .compatible = "loongson,ls2k-gpio", .data = &loongson_gpio_ls2k_data, }, + { + .compatible = "loongson,ls2k0500-gpio0", + .data = &loongson_gpio_ls2k0500_data0, + }, + { + .compatible = "loongson,ls2k0500-gpio1", + .data = &loongson_gpio_ls2k0500_data1, + }, + { + .compatible = "loongson,ls2k2000-gpio0", + .data = &loongson_gpio_ls2k2000_data0, + }, + { + .compatible = "loongson,ls2k2000-gpio1", + .data = &loongson_gpio_ls2k2000_data1, + }, + { + .compatible = "loongson,ls2k2000-gpio2", + .data = &loongson_gpio_ls2k2000_data2, + }, + { + .compatible = "loongson,ls3a5000-gpio", + .data = &loongson_gpio_ls3a5000_data, + }, { .compatible = "loongson,ls7a-gpio", .data = &loongson_gpio_ls7a_data, }, + { + .compatible = "loongson,ls7a2000-gpio1", + .data = &loongson_gpio_ls7a2000_data0, + }, + { + .compatible = "loongson,ls7a2000-gpio2", + .data = &loongson_gpio_ls7a2000_data1, + }, + { + .compatible = "loongson,ls3a6000-gpio", + .data = &loongson_gpio_ls3a6000_data, + }, {} }; MODULE_DEVICE_TABLE(of, loongson_gpio_of_match); @@ -215,6 +403,34 @@ static const struct acpi_device_id loongson_gpio_acpi_match[] = { .id = "LOON0002", .driver_data = (kernel_ulong_t)&loongson_gpio_ls7a_data, }, + { + .id = "LOON0007", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls3a5000_data, + }, + { + .id = "LOON000A", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls2k2000_data0, + }, + { + .id = "LOON000B", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls2k2000_data1, + }, + { + .id = "LOON000C", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls2k2000_data2, + }, + { + .id = "LOON000D", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls7a2000_data0, + }, + { + .id = "LOON000E", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls7a2000_data1, + }, + { + .id = "LOON000F", + .driver_data = (kernel_ulong_t)&loongson_gpio_ls3a6000_data, + }, {} }; MODULE_DEVICE_TABLE(acpi, loongson_gpio_acpi_match); diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c index ebae6035701db..871ceedec74f1 100644 --- a/drivers/i2c/busses/i2c-ls2x.c +++ b/drivers/i2c/busses/i2c-ls2x.c @@ -26,7 +26,8 @@ #include /* I2C Registers */ -#define I2C_LS2X_PRER 0x0 /* Freq Division Register(16 bits) */ +#define I2C_LS2X_PRER_LO 0x0 /* Freq Division Register Lo(8 bits) */ +#define I2C_LS2X_PRER_HI 0x1 /* Freq Division Register Hi(8 bits) */ #define I2C_LS2X_CTR 0x2 /* Control Register */ #define I2C_LS2X_TXR 0x3 /* Transport Data Register */ #define I2C_LS2X_RXR 0x3 /* Receive Data Register */ @@ -96,6 +97,7 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) struct i2c_timings *t = &priv->i2c_t; struct device *dev = priv->adapter.dev.parent; u32 acpi_speed = i2c_acpi_find_bus_speed(dev); + u16 prer_val; i2c_parse_fw_timings(dev, t, false); @@ -104,9 +106,11 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) else t->bus_freq_hz = LS2X_I2C_FREQ_STD; - /* Calculate and set i2c frequency. */ - writew(LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1, - priv->base + I2C_LS2X_PRER); + prer_val = LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1; + + /* set i2c frequency. */ + writeb(prer_val & 0xFF, priv->base + I2C_LS2X_PRER_LO); + writeb((prer_val & 0xFF00) >> 8, priv->base + I2C_LS2X_PRER_HI); } static void ls2x_i2c_init(struct ls2x_i2c_priv *priv) From f996548addd30a7cedd70693ec4e26b3849f9b07 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:51:08 +0800 Subject: [PATCH 11/19] LoongArch: Adjust the calculation of the number of packages Signed-off-by: wanghongliang Signed-off-by: Binbin Zhou --- arch/loongarch/include/asm/topology.h | 4 ++++ arch/loongarch/kernel/setup.c | 1 + arch/loongarch/kernel/smp.c | 3 +++ drivers/platform/loongarch/cpu_hwmon.c | 3 +-- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 66128dec0bf6e..379f5e4830ebd 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -29,10 +29,14 @@ void numa_set_distance(int from, int to, int distance); #endif #ifdef CONFIG_SMP +extern unsigned int __max_packages; +#define topology_max_packages() (__max_packages) #define topology_physical_package_id(cpu) (cpu_data[cpu].package) #define topology_core_id(cpu) (cpu_data[cpu].core) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) +#else +#define topology_max_packages() (1) #endif #include diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 154796a8a9271..4f5eee23a31d6 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -133,6 +133,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm) loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET); + __max_packages++; pr_info("CpuClock = %llu\n", cpu_clock_freq); } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 78b42d06bc06f..92d827752f8bb 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -75,6 +75,9 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_CLEAR_VECTOR] = "Clear vector interrupts", }; +unsigned int __max_packages __read_mostly; +EXPORT_SYMBOL(__max_packages); + void show_ipi_list(struct seq_file *p, int prec) { unsigned int cpu, i; diff --git a/drivers/platform/loongarch/cpu_hwmon.c b/drivers/platform/loongarch/cpu_hwmon.c index c705d088c44a8..6e6088a43a4d9 100644 --- a/drivers/platform/loongarch/cpu_hwmon.c +++ b/drivers/platform/loongarch/cpu_hwmon.c @@ -156,8 +156,7 @@ static int __init loongson_hwmon_init(void) pr_info("Loongson Hwmon Enter...\n"); - nr_packages = loongson_sysconf.nr_cpus / - loongson_sysconf.cores_per_package; + nr_packages = topology_max_packages(); cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon", NULL, cpu_hwmon_groups); From 402112e687f6052a816697ae3f8a3acece998244 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 15 Feb 2025 11:25:16 +0800 Subject: [PATCH 12/19] irqchip/loongarch-avec: Add multi-nodes topology support commit 2af257388473298898d71313cfa6092b572f2602 upstream. avecintc_init() enables the Advanced Interrupt Controller (AVEC) of the boot CPU node, but nothing enables the AVEC on secondary nodes. Move the enablement to the CPU hotplug callback so that secondary nodes get the AVEC enabled too. In theory enabling it once per node would be sufficient, but redundant enabling does no hard, so keep the code simple and do it unconditionally. Signed-off-by: Tianyang Zhang Signed-off-by: Binbin Zhou --- drivers/irqchip/irq-loongarch-avec.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-loongarch-avec.c b/drivers/irqchip/irq-loongarch-avec.c index e8c43b3fd8266..8e1172f337291 100644 --- a/drivers/irqchip/irq-loongarch-avec.c +++ b/drivers/irqchip/irq-loongarch-avec.c @@ -55,6 +55,15 @@ struct avecintc_data { unsigned int moving; }; +static inline void avecintc_enable(void) +{ + u64 value; + + value = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC); + value |= IOCSR_MISC_FUNC_AVEC_EN; + iocsr_write64(value, LOONGARCH_IOCSR_MISC_FUNC); +} + static inline void avecintc_ack_irq(struct irq_data *d) { } @@ -137,6 +146,8 @@ static int avecintc_cpu_online(unsigned int cpu) raw_spin_lock(&loongarch_avec.lock); + avecintc_enable(); + irq_matrix_online(loongarch_avec.vector_matrix); pending_list_init(cpu); @@ -365,7 +376,6 @@ static int __init irq_matrix_init(void) static int __init avecintc_init(struct irq_domain *parent) { int ret, parent_irq; - unsigned long value; raw_spin_lock_init(&loongarch_avec.lock); @@ -404,9 +414,7 @@ static int __init avecintc_init(struct irq_domain *parent) "irqchip/loongarch/avecintc:starting", avecintc_cpu_online, avecintc_cpu_offline); #endif - value = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC); - value |= IOCSR_MISC_FUNC_AVEC_EN; - iocsr_write64(value, LOONGARCH_IOCSR_MISC_FUNC); + avecintc_enable(); return ret; From c9f490bdc4038f2a0ff7af914c60071874787231 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:50:41 +0800 Subject: [PATCH 13/19] irqchip/loongson-eiointc: Fix external irq route error Signed-off-by: wanghongliang Signed-off-by: Binbin Zhou --- drivers/irqchip/irq-loongson-eiointc.c | 46 +++++++++++++++++++------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 0799a3a72ed85..c64dbcdf48b94 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -45,6 +45,16 @@ #define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE) +typedef struct { DECLARE_BITMAP(bits, MAX_EIO_NODES); } extioi_node_map; + +#define EXTIOI_NODE_MASK_NONE \ +((extioi_node_map) { { \ + [0 ... BITS_TO_LONGS(MAX_EIO_NODES)-1] = 0UL \ +} }) + +extioi_node_map extioi_node_maps = EXTIOI_NODE_MASK_NONE; + + static int nr_pics; struct eiointc_priv { @@ -78,6 +88,7 @@ static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, int i, node, cpu_node, route_node; unsigned char coremap; uint32_t pos_off, data, data_byte, data_mask; + nodemask_t eio_node_map = *node_map; pos_off = pos & ~3; data_byte = pos & 3; @@ -89,13 +100,14 @@ static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, for_each_online_cpu(i) { node = cpu_to_eio_node(i); - if (!node_isset(node, *node_map)) - continue; - - /* EIO node 0 is in charge of inter-node interrupt dispatch */ - route_node = (node == mnode) ? cpu_node : node; - data = ((coremap | (route_node << 4)) << (data_byte * 8)); - csr_any_send(EIOINTC_REG_ROUTE + pos_off, data, data_mask, node * CORES_PER_EIO_NODE); + if (node_isset(node, eio_node_map)) { + node_clear(node, eio_node_map); + /* EIO node 0 is in charge of inter-node interrupt dispatch */ + route_node = (node == mnode) ? cpu_node : node; + data = ((coremap | (route_node << 4)) << (data_byte * 8)); + csr_any_send(EIOINTC_REG_ROUTE + pos_off, data, data_mask, + cpu_logical_map(i)); + } } } @@ -103,7 +115,7 @@ static DEFINE_RAW_SPINLOCK(affinity_lock); static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) { - unsigned int cpu; + unsigned int cpu, i; unsigned long flags; uint32_t vector, regaddr; struct cpumask intersect_affinity; @@ -123,16 +135,21 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af vector = d->hwirq; regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); + for_each_online_cpu(i) { + if (cpu_to_eio_node(i) == priv->node) + break; + } + /* Mask target vector */ csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), - 0x0, priv->node * CORES_PER_EIO_NODE); + 0x0, cpu_logical_map(i)); /* Set route for target vector */ eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); /* Unmask target vector */ csr_any_send(regaddr, EIOINTC_ALL_ENABLE, - 0x0, priv->node * CORES_PER_EIO_NODE); + 0x0, cpu_logical_map(i)); irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -184,7 +201,9 @@ static int eiointc_router_init(unsigned int cpu) if (index == 0) bit = BIT(cpu_logical_map(0)); else - bit = (eiointc_priv[index]->node << 4) | 1; + bit = (eiointc_priv[index]->node << 4) | + BIT(cpu_logical_map(smp_processor_id()) % + CORES_PER_EIO_NODE); data = bit | (bit << 8) | (bit << 16) | (bit << 24); iocsr_write32(data, EIOINTC_REG_ROUTE + i * 4); @@ -193,8 +212,10 @@ static int eiointc_router_init(unsigned int cpu) for (i = 0; i < eiointc_priv[0]->vec_count / 32; i++) { data = 0xffffffff; iocsr_write32(data, EIOINTC_REG_ENABLE + i * 4); - iocsr_write32(data, EIOINTC_REG_BOUNCE + i * 4); + iocsr_write32(0, EIOINTC_REG_BOUNCE + i * 4); } + + set_bit(node, (&extioi_node_maps)->bits); } return 0; @@ -324,6 +345,7 @@ static int eiointc_suspend(void) static void eiointc_resume(void) { + extioi_node_maps = EXTIOI_NODE_MASK_NONE; eiointc_router_init(0); } From 6de7532625b7b17a3f91285e68cc433981c353b5 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 15 Feb 2025 16:51:24 +0800 Subject: [PATCH 14/19] PCI: Prevent LS7A Bus Master clearing on kexec This is similar to commit 62b6dee1b44aa23b39355 ("PCI/portdrv: Prevent LS7A Bus Master clearing on shutdown"), which prevents LS7A Bus Master clearing on kexec. Only skip Bus Master clearing on bridges because endpoint devices still need it. Signed-off-by: Ming Wang Signed-off-by: Huacai Chen Signed-off-by: Binbin Zhou --- drivers/pci/pci-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 9c59bf03d6579..28d1f98eef546 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -526,7 +526,7 @@ static void pci_device_shutdown(struct device *dev) * If it is not a kexec reboot, firmware will hit the PCI * devices with big hammer and stop their DMA any way. */ - if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot)) + if (kexec_in_progress && !pci_is_bridge(pci_dev) && (pci_dev->current_state <= PCI_D3hot)) pci_clear_master(pci_dev); } From 045c9905e81120348e8db96eba1bffc278962644 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:48:41 +0800 Subject: [PATCH 15/19] pci/quirks: LS7A2000: Fix pm transition of devices under pcie port The CPU may not be able to access the PCI header of the device if it is in low-power mode. Signed-off-by: Zhao Qunqin Signed-off-by: Binbin Zhou --- drivers/pci/quirks.c | 30 ++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 31 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e7daea73b61d8..e18437a9eb13c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -396,6 +396,36 @@ static void quirk_tigerpoint_bm_sts(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGP_LPC, quirk_tigerpoint_bm_sts); #endif +#define DEV_PCIE_PORT_4 0x7a39 +#define DEV_PCIE_PORT_5 0x7a49 +#define DEV_PCIE_PORT_6 0x7a59 +#define DEV_PCIE_PORT_7 0x7a69 +static void loongson_d3_and_link_quirk(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + struct pci_dev *bridge; + static const struct pci_device_id bridge_devids[] = { + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_4) }, + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_5) }, + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_6) }, + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_7) }, + { 0, }, + }; + + /* look for the matching bridge */ + while (!pci_is_root_bus(bus)) { + bridge = bus->self; + bus = bus->parent; + if (bridge && pci_match_id(bridge_devids, bridge)) { + dev->dev_flags |= (PCI_DEV_FLAGS_NO_D3 | + PCI_DEV_FLAGS_NO_LINK_SPEED_CHANGE); + dev->no_d1d2 = 1; + break; + } + } +} +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_d3_and_link_quirk); + /* Chipsets where PCI->PCI transfers vanish or hang */ static void quirk_nopcipci(struct pci_dev *dev) { diff --git a/include/linux/pci.h b/include/linux/pci.h index eb33196d0866c..22ccd11b2e219 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,7 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + PCI_DEV_FLAGS_NO_LINK_SPEED_CHANGE = (__force pci_dev_flags_t) (1 << 15), }; enum pci_irq_reroute_variant { From 4985495f06d6d189e49dcb7a83dd7d43d3a52276 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 15 Feb 2025 14:06:13 +0800 Subject: [PATCH 16/19] PCI/ACPI: Increase Loongson max PCI hosts to 8 commit 1f35a0c74e441e1a21b5414c25bc01f06e9cca31 upstream. Beginning with Loongson-3C6000, there can be up to 8 PCI hosts for multi-chip machines. To support these machines, increase the number of entries in mcfg_quirks to 8. Link: https://lore.kernel.org/r/20240726092911.2042656-1-chenhuacai@loongson.cn Signed-off-by: Haowei Zheng Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas Signed-off-by: Binbin Zhou --- drivers/acpi/pci_mcfg.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 860014b89b8eb..58e10a9801148 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -181,6 +181,18 @@ static struct mcfg_fixup mcfg_quirks[] = { LOONGSON_ECAM_MCFG("LOONGSON", 0), LOONGSON_ECAM_MCFG("\0", 1), LOONGSON_ECAM_MCFG("LOONGSON", 1), + LOONGSON_ECAM_MCFG("\0", 2), + LOONGSON_ECAM_MCFG("LOONGSON", 2), + LOONGSON_ECAM_MCFG("\0", 3), + LOONGSON_ECAM_MCFG("LOONGSON", 3), + LOONGSON_ECAM_MCFG("\0", 4), + LOONGSON_ECAM_MCFG("LOONGSON", 4), + LOONGSON_ECAM_MCFG("\0", 5), + LOONGSON_ECAM_MCFG("LOONGSON", 5), + LOONGSON_ECAM_MCFG("\0", 6), + LOONGSON_ECAM_MCFG("LOONGSON", 6), + LOONGSON_ECAM_MCFG("\0", 7), + LOONGSON_ECAM_MCFG("LOONGSON", 7), #endif /* LOONGARCH */ }; From de4ab3e5c94cf15a6b061b5eed9e349ca64362af Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:47:16 +0800 Subject: [PATCH 17/19] drm/loongson: Compile loongson drm driver as module Loongson drm driver will not be used in the future, use loongson drm dkms package instead. Signed-off-by: Hongchen Zhang --- arch/loongarch/configs/loongson3_defconfig | 2 +- drivers/gpu/drm/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 55956cda2fa84..75f0caefacc5f 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -1476,7 +1476,7 @@ CONFIG_DRM_AST=y CONFIG_DRM_MGAG200=m CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m -CONFIG_DRM_LOONGSON=y +CONFIG_DRM_LOONGSON=m CONFIG_DRM_BOCHS=m CONFIG_DRM_CIRRUS_QEMU=m CONFIG_FB=y diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 00f8d2f85e1cb..896284c7327d1 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -198,6 +198,6 @@ obj-y += gud/ obj-$(CONFIG_DRM_HYPERV) += hyperv/ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ -obj-$(CONFIG_DRM_LOONGSON) += loongson/ +obj-y += loongson/ obj-$(CONFIG_DRM_PHYTIUM) += phytium/ obj-$(CONFIG_DRM_ARISE) += arise/ From dc2b4d19800d25c642489b71475da1d0b3ef975a Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:42:08 +0800 Subject: [PATCH 18/19] cpufreq: loongson3-acpi: Initialize scaling_cur_freq correctly The policy->cur was not being initialized properly during CPU initialization, leading to it always reporting 0. This commit addresses this issue by setting the initial frequency to the normal maximum frequency. This ensures that the current frequency reflects the actual CPU operating frequency. Signed-off-by: Ming Wang Signed-off-by: Binbin Zhou --- drivers/cpufreq/loongson3-acpi-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/loongson3-acpi-cpufreq.c b/drivers/cpufreq/loongson3-acpi-cpufreq.c index 0c8155b8995a1..b50b936a70076 100644 --- a/drivers/cpufreq/loongson3-acpi-cpufreq.c +++ b/drivers/cpufreq/loongson3-acpi-cpufreq.c @@ -1241,6 +1241,8 @@ static int loongson3_cpufreq_cpu_init(struct cpufreq_policy *policy) if (has_boost_freq() && boost_supported()) loongson3_cpufreq_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + policy->cur = core->normal_max_freq * 1000; + pr_info("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) pr_debug(" %cP%d: %d MHz, %d mW, %d uS %d level\n", From 2755f1e91ae77f50b9fe491bbdda97e1baaae18f Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 14 Feb 2025 17:49:18 +0800 Subject: [PATCH 19/19] net: stmmac: dwmac-loongson: Add loongson_dwmac_fix_reset() callback Loongson's DWMAC device may take nearly two seconds to complete DMA reset, however, the default waiting time for reset is 200 milliseconds. Fixes: 803fc61df261 ("net: stmmac: dwmac-loongson: Add Loongson Multi-channels GMAC support") Signed-off-by: Qunqin Zhao Reviewed-by: Huacai Chen Signed-off-by: Binbin Zhou --- .../net/ethernet/stmicro/stmmac/dwmac-loongson.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index fd85f8784649a..dede4babe9272 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -469,6 +469,19 @@ static int loongson_dwmac_dt_config(struct pci_dev *pdev, return 0; } +/* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ +static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + DMA_BUS_MODE); + + value |= DMA_BUS_MODE_SFT_RESET; + writel(value, ioaddr + DMA_BUS_MODE); + + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + !(value & DMA_BUS_MODE_SFT_RESET), + 10000, 2000000); +} + static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -521,6 +534,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; + plat->fix_soc_reset = loongson_dwmac_fix_reset; ld->dev = &pdev->dev; if (dev_of_node(&pdev->dev)) {