Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@
# Rely on implicit context synchronization as a result of exception return
# when returning from IPI handler, and when returning to user-space.
#
# * riscv
#
# riscv uses xRET as return from interrupt and to return to user-space.
#
# Given that xRET is not core serializing, we rely on FENCE.I for providing
# core serialization:
#
# - by calling sync_core_before_usermode() on return from interrupt (cf.
# ipi_sync_core()),
#
# - via switch_mm() and sync_core_before_usermode() (respectively, for
# uthread->uthread and kthread->uthread transitions) before returning
# to user-space.
#
# The serialization in switch_mm() is activated by prepare_sync_core_cmd().
#
# * x86
#
# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
Expand Down Expand Up @@ -44,7 +60,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
| riscv: | TODO |
| riscv: | ok |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
Expand Down
1 change: 1 addition & 0 deletions Documentation/scheduler/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Scheduler


completion
membarrier
sched-arch
sched-bwc
sched-deadline
Expand Down
39 changes: 39 additions & 0 deletions Documentation/scheduler/membarrier.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.. SPDX-License-Identifier: GPL-2.0

========================
membarrier() System Call
========================

MEMBARRIER_CMD_{PRIVATE,GLOBAL}_EXPEDITED - Architecture requirements
=====================================================================

Memory barriers before updating rq->curr
----------------------------------------

The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
require each architecture to have a full memory barrier after coming from
user-space, before updating rq->curr. This barrier is implied by the sequence
rq_lock(); smp_mb__after_spinlock() in __schedule(). The barrier matches a full
barrier in the proximity of the membarrier system call exit, cf.
membarrier_{private,global}_expedited().

Memory barriers after updating rq->curr
---------------------------------------

The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
require each architecture to have a full memory barrier after updating rq->curr,
before returning to user-space. The schemes providing this barrier on the various
architectures are as follows.

- alpha, arc, arm, hexagon, mips rely on the full barrier implied by
spin_unlock() in finish_lock_switch().

- arm64 relies on the full barrier implied by switch_to().

- powerpc, riscv, s390, sparc, x86 rely on the full barrier implied by
switch_mm(), if mm is not NULL; they rely on the full barrier implied
by mmdrop(), otherwise. On powerpc and riscv, switch_mm() relies on
membarrier_arch_switch_mm().

The barrier matches a full barrier in the proximity of the membarrier system call
entry, cf. membarrier_{private,global}_expedited().
4 changes: 3 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -13732,7 +13732,9 @@ M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
M: "Paul E. McKenney" <paulmck@kernel.org>
L: linux-kernel@vger.kernel.org
S: Supported
F: arch/powerpc/include/asm/membarrier.h
F: Documentation/scheduler/membarrier.rst
F: arch/*/include/asm/membarrier.h
F: arch/*/include/asm/sync_core.h
F: include/uapi/linux/membarrier.h
F: kernel/sched/membarrier.c

Expand Down
2 changes: 2 additions & 0 deletions arch/arm/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,

extern pgd_t swapper_pg_dir[PTRS_PER_PGD];

#define pgdp_get(pgpd) READ_ONCE(*pgdp)

#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))

Expand Down
4 changes: 4 additions & 0 deletions arch/riscv/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,18 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
select ARCH_HAS_PREPARE_SYNC_CORE_CMD
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP if MMU
select ARCH_HAS_SET_MEMORY if MMU
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
Expand Down
4 changes: 2 additions & 2 deletions arch/riscv/include/asm/kfence.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
pte_t *pte = virt_to_kpte(addr);

if (protect)
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT));
else
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));

flush_tlb_kernel_range(addr, addr + PAGE_SIZE);

Expand Down
50 changes: 50 additions & 0 deletions arch/riscv/include/asm/membarrier.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_RISCV_MEMBARRIER_H
#define _ASM_RISCV_MEMBARRIER_H

static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
/*
* Only need the full barrier when switching between processes.
* Barrier when switching from kernel to userspace is not
* required here, given that it is implied by mmdrop(). Barrier
* when switching from userspace to kernel is not needed after
* store to rq->curr.
*/
if (IS_ENABLED(CONFIG_SMP) &&
likely(!(atomic_read(&next->membarrier_state) &
(MEMBARRIER_STATE_PRIVATE_EXPEDITED |
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
return;

/*
* The membarrier system call requires a full memory barrier
* after storing to rq->curr, before going back to user-space.
*
* This barrier is also needed for the SYNC_CORE command when
* switching between processes; in particular, on a transition
* from a thread belonging to another mm to a thread belonging
* to the mm for which a membarrier SYNC_CORE is done on CPU0:
*
* - [CPU0] sets all bits in the mm icache_stale_mask (in
* prepare_sync_core_cmd());
*
* - [CPU1] stores to rq->curr (by the scheduler);
*
* - [CPU0] loads rq->curr within membarrier and observes
* cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
* CPU1; this means membarrier relies on switch_mm() to
* issue the sync-core;
*
* - [CPU1] switch_mm() loads icache_stale_mask; if the bit
* is zero, switch_mm() may incorrectly skip the sync-core.
*
* Matches a full barrier in the proximity of the membarrier
* system call entry.
*/
smp_mb();
}

#endif /* _ASM_RISCV_MEMBARRIER_H */
22 changes: 5 additions & 17 deletions arch/riscv/include/asm/pgtable-64.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ static inline int pud_user(pud_t pud)

static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
WRITE_ONCE(*pudp, pud);
}

static inline void pud_clear(pud_t *pudp)
Expand Down Expand Up @@ -274,7 +274,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
*p4dp = p4d;
WRITE_ONCE(*p4dp, p4d);
else
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
}
Expand Down Expand Up @@ -336,18 +336,12 @@ static inline struct page *p4d_page(p4d_t p4d)
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))

#define pud_offset pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
if (pgtable_l4_enabled)
return p4d_pgtable(*p4d) + pud_index(address);

return (pud_t *)p4d;
}
pud_t *pud_offset(p4d_t *p4d, unsigned long address);

static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (pgtable_l5_enabled)
*pgdp = pgd;
WRITE_ONCE(*pgdp, pgd);
else
set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
}
Expand Down Expand Up @@ -400,12 +394,6 @@ static inline struct page *pgd_page(pgd_t pgd)
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))

#define p4d_offset p4d_offset
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
if (pgtable_l5_enabled)
return pgd_pgtable(*pgd) + p4d_index(address);

return (p4d_t *)pgd;
}
p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);

#endif /* _ASM_RISCV_PGTABLE_64_H */
33 changes: 8 additions & 25 deletions arch/riscv/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static inline int pmd_leaf(pmd_t pmd)

static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
WRITE_ONCE(*pmdp, pmd);
}

static inline void pmd_clear(pmd_t *pmdp)
Expand Down Expand Up @@ -515,7 +515,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
WRITE_ONCE(*ptep, pteval);
}

void flush_icache_pte(pte_t pte);
Expand Down Expand Up @@ -549,19 +549,12 @@ static inline void pte_clear(struct mm_struct *mm,
__set_pte_at(ptep, __pte(0));
}

#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
if (!pte_same(*ptep, entry))
__set_pte_at(ptep, entry);
/*
* update_mmu_cache will unconditionally execute, handling both
* the case that the PTE changed and the spurious fault case.
*/
return true;
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep, pte_t entry, int dirty);
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
Expand All @@ -574,16 +567,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}

#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pte_t *ptep)
{
if (!pte_young(*ptep))
return 0;
return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
}

#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
Expand Down
29 changes: 29 additions & 0 deletions arch/riscv/include/asm/sync_core.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RISCV_SYNC_CORE_H
#define _ASM_RISCV_SYNC_CORE_H

/*
* RISC-V implements return to user-space through an xRET instruction,
* which is not core serializing.
*/
static inline void sync_core_before_usermode(void)
{
asm volatile ("fence.i" ::: "memory");
}

#ifdef CONFIG_SMP
/*
* Ensure the next switch_mm() on every CPU issues a core serializing
* instruction for the given @mm.
*/
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
{
cpumask_setall(&mm->context.icache_stale_mask);
}
#else
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
{
}
#endif /* CONFIG_SMP */

#endif /* _ASM_RISCV_SYNC_CORE_H */
2 changes: 1 addition & 1 deletion arch/riscv/kernel/efi.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = READ_ONCE(*ptep);
pte_t pte = ptep_get(ptep);
unsigned long val;

if (md->attribute & EFI_MEMORY_RO) {
Expand Down
Loading