From 0cdc91bb8c4bd6c1ab17ddb034754e04789ad06e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:33 +0000 Subject: [PATCH 1/8] KVM: arm64: Discard any SVE state when entering KVM guests Since 8383741ab2e773a99 (KVM: arm64: Get rid of host SVE tracking/saving) KVM has not tracked the host SVE state, relying on the fact that we currently disable SVE whenever we perform a syscall. This may not be true in future since performance optimisation may result in us keeping SVE enabled in order to avoid needing to take access traps to reenable it. Handle this by clearing TIF_SVE and converting the stored task state to FPSIMD format when preparing to run the guest. This is done with a new call fpsimd_kvm_prepare() to keep the direct state manipulation functions internal to fpsimd.c. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/fpsimd.c | 23 +++++++++++++++++++++++ arch/arm64/kvm/fpsimd.c | 2 ++ 3 files changed, 26 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622ae..b13a902d74738 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -44,6 +44,7 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_kvm_prepare(void); extern void fpsimd_bind_task_to_cpu(void); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a9bbfb800ec2b..eb7774096dd6e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1096,6 +1096,29 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } +/* + * Called by KVM when entering the guest. + */ +void fpsimd_kvm_prepare(void) +{ + if (!system_supports_sve()) + return; + + /* + * KVM does not save host SVE state since we can only enter + * the guest from a syscall so the ABI means that only the + * non-saved SVE state needs to be saved. If we have left + * SVE enabled for performance reasons then update the task + * state to be FPSIMD only. + */ + get_cpu_fpsimd_context(); + + if (test_and_clear_thread_flag(TIF_SVE)) + sve_to_fpsimd(current); + + put_cpu_fpsimd_context(); +} + /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3e081d556e810..98fab83647f35 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -67,6 +67,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) KVM_ARM64_HOST_SVE_ENABLED); vcpu->arch.flags |= KVM_ARM64_FP_HOST; + fpsimd_kvm_prepare(); + if (test_thread_flag(TIF_SVE)) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; From a43af7f72d5467d60d0529698fe0466627f3798a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:34 +0000 Subject: [PATCH 2/8] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE When we save the state for the floating point registers this can be done in the form visible through either the FPSIMD V registers or the SVE Z and P registers. At present we track which format is currently used based on TIF_SVE and the SME streaming mode state but particularly in the SVE case this limits our options for optimising things, especially around syscalls. Introduce a new enum which we place together with saved floating point state in both thread_struct and the KVM guest state which explicitly states which format is active and keep it up to date when we change it. At present we do not use this state except to verify that it has the expected value when loading the state, future patches will introduce functional changes. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 3 +- arch/arm64/include/asm/kvm_host.h | 12 +++ arch/arm64/include/asm/processor.h | 12 +++ arch/arm64/kernel/fpsimd.c | 117 +++++++++++++++++++---------- arch/arm64/kernel/process.c | 2 + arch/arm64/kernel/ptrace.c | 8 ++ arch/arm64/kernel/signal.c | 3 + arch/arm64/kvm/fpsimd.c | 3 +- 8 files changed, 120 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b13a902d74738..32e88828f89c4 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -48,7 +48,8 @@ extern void fpsimd_kvm_prepare(void); extern void fpsimd_bind_task_to_cpu(void); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + enum fp_type *type); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 912b83e784bbf..1e496fff74184 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -283,7 +283,19 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* + * Guest floating point state + * + * The architecture has two main floating point extensions, + * the original FPSIMD and SVE. These have overlapping + * register views, with the FPSIMD V registers occupying the + * low 128 bits of the SVE Z registers. When the core + * floating point code saves the register state of a task it + * records which view it saved in fp_type. + */ void *sve_state; + enum fp_type fp_type; unsigned int sve_max_vl; /* Stage 2 paging state used by the hardware on next switch */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7c546c3487c9f..d0fb02c244bc0 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -111,6 +111,17 @@ struct debug_info { #endif }; +enum vec_type { + ARM64_VEC_SVE = 0, + ARM64_VEC_SME, + ARM64_VEC_MAX, +}; + +enum fp_type { + FP_STATE_FPSIMD, + FP_STATE_SVE, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -141,6 +152,7 @@ struct thread_struct { struct user_fpsimd_state fpsimd_state; } uw; + enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ unsigned int sve_vl; /* SVE vector length */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index eb7774096dd6e..a86aa585db05a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -117,6 +117,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; unsigned int sve_vl; + enum fp_type *fp_type; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -241,14 +242,6 @@ static void sve_free(struct task_struct *task) * The task can execute SVE instructions while in userspace without * trapping to the kernel. * - * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the - * corresponding Zn), P0-P15 and FFR are encoded in in - * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. - * - * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. - * * During any syscall, the kernel may optionally clear TIF_SVE and * discard the vector state except for the FPSIMD subset. * @@ -258,7 +251,15 @@ static void sve_free(struct task_struct *task) * do_sve_acc() to be called, which does some preparation and then * sets TIF_SVE. * - * When stored, FPSIMD registers V0-V31 are encoded in + * During any syscall, the kernel may optionally clear TIF_SVE and + * discard the vector state except for the FPSIMD subset. + * + * The data will be stored in one of two formats: + * + * * FPSIMD only - FP_STATE_FPSIMD: + * + * When the FPSIMD only state stored task->thread.fp_type is set to + * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of @@ -268,6 +269,19 @@ static void sve_free(struct task_struct *task) * task->thread.sve_state does not need to be non-NULL, valid or any * particular size: it must not be dereferenced. * + * * SVE state - FP_STATE_SVE: + * + * When the full SVE state is stored task->thread.fp_type is set to + * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the + * corresponding Zn), P0-P15 and FFR are encoded in in + * task->thread.sve_state, formatted appropriately for vector + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. The storage for the vector registers in + * task->thread.uw.fpsimd_state should be ignored. + * + * task->thread.sve_state must point to a valid buffer at least + * sve_state_size(task) bytes in size. + * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are * not vector length dependent. @@ -282,15 +296,24 @@ static void sve_free(struct task_struct *task) */ static void task_fpsimd_load(void) { + bool restore_sve_regs = false; WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) - sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, - sve_vq_from_vl(current->thread.sve_vl) - 1); - else - fpsimd_load_state(¤t->thread.uw.fpsimd_state); + /* Check if we should restore SVE first */ + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + restore_sve_regs = true; + } + + if (restore_sve_regs) { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); + sve_load_state(sve_pffr(¤t->thread), + ¤t->thread.uw.fpsimd_state.fpsr, + sve_vq_from_vl(current->thread.sve_vl) - 1); + } else { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD); + fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* @@ -301,28 +324,37 @@ static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); - /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + unsigned int vl; + /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + + WARN_ON(!system_supports_fpsimd()); + WARN_ON(!have_cpu_fpsimd_context()); + + if (system_supports_sve() && test_thread_flag(TIF_SVE)) { + save_sve_regs = true; + vl = last->sve_vl; + } - WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - } else - fpsimd_save_state(last->st); + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr); + *last->fp_type = FP_STATE_SVE; + } else { + fpsimd_save_state(last->st); + *last->fp_type = FP_STATE_FPSIMD; } } @@ -627,8 +659,10 @@ int sve_set_vector_length(struct task_struct *task, } fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) { sve_to_fpsimd(task); + task->thread.fp_type = FP_STATE_FPSIMD; + } if (task == current) put_cpu_fpsimd_context(); @@ -1067,6 +1101,8 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } + current->thread.fp_type = FP_STATE_FPSIMD; + put_cpu_fpsimd_context(); } @@ -1113,8 +1149,10 @@ void fpsimd_kvm_prepare(void) */ get_cpu_fpsimd_context(); - if (test_and_clear_thread_flag(TIF_SVE)) + if (test_and_clear_thread_flag(TIF_SVE)) { sve_to_fpsimd(current); + current->thread.fp_type = FP_STATE_FPSIMD; + } put_cpu_fpsimd_context(); } @@ -1133,6 +1171,7 @@ void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; + last->fp_type = ¤t->thread.fp_type; current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1147,7 +1186,8 @@ void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, + enum fp_type *type) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1158,6 +1198,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->st = st; last->sve_state = sve_state; last->sve_vl = sve_vl; + last->fp_type = type; } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f95dfc22d679e..8b80d41fc6d20 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -372,6 +372,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + dst->thread.fp_type = FP_STATE_FPSIMD; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2817e39881fee..e762a7e608cff 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -831,6 +831,7 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; } @@ -847,6 +848,12 @@ static int sve_set(struct task_struct *target, } sve_alloc(target); + if (!target->thread.sve_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; + goto out; + } /* * Ensure target->thread.sve_state is up to date with target's @@ -855,6 +862,7 @@ static int sve_set(struct task_struct *target, */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_SVE; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 23f0a3c61f904..695835393cd65 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -208,6 +208,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ if (!err) @@ -272,6 +273,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; goto fpsimd_only; } @@ -299,6 +301,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) return -EFAULT; set_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_SVE; fpsimd_only: /* copy the FP and status/control registers */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 98fab83647f35..3409ba1b5eff2 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -89,7 +89,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + &vcpu->arch.fp_type); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); From 251944f1af4a8f77c6ed3cb4ef3039e03f6ebf3a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:35 +0000 Subject: [PATCH 3/8] arm64/fpsimd: Have KVM explicitly say which FP registers to save In order to avoid needlessly saving and restoring the guest registers KVM relies on the host FPSMID code to save the guest registers when we context switch away from the guest. This is done by binding the KVM guest state to the CPU on top of the task state that was originally there, then carefully managing the TIF_SVE flag for the task to cause the host to save the full SVE state when needed regardless of the needs of the host task. This works well enough but isn't terribly direct about what is going on and makes it much more complicated to try to optimise what we're doing with the SVE register state. Let's instead have KVM pass in the register state it wants saving when it binds to the CPU. We introduce a new FP_STATE_CURRENT for use during normal task binding to indicate that we should base our decisions on the current task. This should not be used when actually saving. Ideally we might want to use a separate enum for the type to save but this enum and the enum values would then need to be named which has problems with clarity and ambiguity. In order to ease any future debugging that might be required this patch does not actually update any of the decision making about what to save, it merely starts tracking the new information and warns if the requested state is not what we would otherwise have decided to save. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/fpsimd.c | 27 ++++++++++++++++++++++++--- arch/arm64/kvm/fpsimd.c | 9 ++++++++- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 32e88828f89c4..80b5f7f23b8fe 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -49,7 +49,7 @@ extern void fpsimd_kvm_prepare(void); extern void fpsimd_bind_task_to_cpu(void); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - enum fp_type *type); + enum fp_type *type, enum fp_type to_save); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index d0fb02c244bc0..897d19948c40c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -118,6 +118,7 @@ enum vec_type { }; enum fp_type { + FP_STATE_CURRENT, /* Save based on current task state. */ FP_STATE_FPSIMD, FP_STATE_SVE, }; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a86aa585db05a..46dedd9f56143 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -118,6 +118,7 @@ struct fpsimd_last_state_struct { void *sve_state; unsigned int sve_vl; enum fp_type *fp_type; + enum fp_type to_save; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -267,7 +268,8 @@ static void sve_free(struct task_struct *task) * but userspace is discouraged from relying on this. * * task->thread.sve_state does not need to be non-NULL, valid or any - * particular size: it must not be dereferenced. + * particular size: it must not be dereferenced and any data stored + * there should be considered stale and not referenced. * * * SVE state - FP_STATE_SVE: * @@ -280,7 +282,9 @@ static void sve_free(struct task_struct *task) * task->thread.uw.fpsimd_state should be ignored. * * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. + * sve_state_size(task) bytes in size. The data stored in + * task->thread.uw.fpsimd_state.vregs should be considered stale + * and not referenced. * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are @@ -336,6 +340,21 @@ static void fpsimd_save(void) vl = last->sve_vl; } + /* + * Validate that an explicitly specified state to save is + * consistent with the task state. + */ + switch (last->to_save) { + case FP_STATE_CURRENT: + break; + case FP_STATE_FPSIMD: + WARN_ON_ONCE(save_sve_regs); + break; + case FP_STATE_SVE: + WARN_ON_ONCE(!save_sve_regs); + break; + } + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { /* Get the configured VL from RDVL, will account for SM */ if (WARN_ON(sve_get_vl() != vl)) { @@ -1172,6 +1191,7 @@ void fpsimd_bind_task_to_cpu(void) last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; last->fp_type = ¤t->thread.fp_type; + last->to_save = FP_STATE_CURRENT; current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1187,7 +1207,7 @@ void fpsimd_bind_task_to_cpu(void) void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, unsigned int sve_vl, - enum fp_type *type) + enum fp_type *type, enum fp_type to_save) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1199,6 +1219,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->sve_state = sve_state; last->sve_vl = sve_vl; last->fp_type = type; + last->to_save = to_save; } /* diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3409ba1b5eff2..48ddf6dc92e57 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -84,13 +84,20 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { + enum fp_type to_save; + WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + if (vcpu_has_sve(vcpu)) + to_save = FP_STATE_SVE; + else + to_save = FP_STATE_FPSIMD; + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - &vcpu->arch.fp_type); + &vcpu->arch.fp_type,to_save); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); From cbf835f8d27efe1fa8affbde6bbf855ce6638409 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:36 +0000 Subject: [PATCH 4/8] arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM Now that we are explicitly telling the host FP code which register state it needs to save we can remove the manipulation of TIF_SVE from the KVM code, simplifying it and allowing us to optimise our handling of normal tasks. Remove the manipulation of TIF_SVE from KVM and instead rely on to_save to ensure we save the correct data for it. There should be no functional or performance impact from this change. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 35 +++++++++++++++-------------------- arch/arm64/kvm/fpsimd.c | 1 - 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 46dedd9f56143..d0dcac17bb6b4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -322,7 +322,13 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to - * date with respect to the CPU registers. + * date with respect to the CPU registers. Note carefully that the + * current context is the context last bound to the CPU stored in + * last, if KVM is involved this may be the guest VM context rather + * than the host thread for the VM pointed to by current. This means + * that we must always reference the state storage via last rather + * than via current, if we are saving KVM state then it will have + * ensured that the type of registers to save is set in last->to_save. */ static void fpsimd_save(void) { @@ -332,29 +338,18 @@ static void fpsimd_save(void) unsigned int vl; /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); - - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { + WARN_ON(!system_supports_fpsimd()); + WARN_ON(!have_cpu_fpsimd_context()); + + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || + last->to_save == FP_STATE_SVE) { save_sve_regs = true; vl = last->sve_vl; } - /* - * Validate that an explicitly specified state to save is - * consistent with the task state. - */ - switch (last->to_save) { - case FP_STATE_CURRENT: - break; - case FP_STATE_FPSIMD: - WARN_ON_ONCE(save_sve_regs); - break; - case FP_STATE_SVE: - WARN_ON_ONCE(!save_sve_regs); - break; - } - if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { /* Get the configured VL from RDVL, will account for SM */ if (WARN_ON(sve_get_vl() != vl)) { diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 48ddf6dc92e57..fa2972d400c74 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -100,7 +100,6 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) &vcpu->arch.fp_type,to_save); clear_thread_flag(TIF_FOREIGN_FPSTATE); - update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } } From 0638869ca455eb1537cf171f1b53b6d5784622af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:37 +0000 Subject: [PATCH 5/8] arm64/fpsimd: Load FP state based on recorded data type Now that we are recording the type of floating point register state we are saving when we write the register state out to memory we can use that information when we load from memory to decide which format to load, bringing TIF_SVE into line with what we saved rather than relying on TIF_SVE to determine what to load. The SME state details are already recorded directly in the saved SVCR and handled based on the information there. Since we are not changing any of the save paths there should be no functional change from this patch, further patches will make use of this to optimise and clarify the code. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-6-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d0dcac17bb6b4..11cfeb6ae4752 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -305,8 +305,31 @@ static void task_fpsimd_load(void) WARN_ON(!have_cpu_fpsimd_context()); /* Check if we should restore SVE first */ - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { - restore_sve_regs = true; + if (system_supports_sve()) { + switch (current->thread.fp_type) { + case FP_STATE_FPSIMD: + /* Stop tracking SVE for this task until next use. */ + if (test_and_clear_thread_flag(TIF_SVE)) + sve_user_disable(); + break; + case FP_STATE_SVE: + if (!WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE))) + sve_user_enable(); + restore_sve_regs = true; + break; + default: + /* + * This indicates either a bug in + * fpsimd_save() or memory corruption, we + * should always record an explicit format + * when we save. We always at least have the + * memory allocated for FPSMID registers so + * try that and hope for the best. + */ + WARN_ON_ONCE(1); + clear_thread_flag(TIF_SVE); + break; + } } if (restore_sve_regs) { From de129c6571039c414cf04eed86e57c0c79bff6dd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:38 +0000 Subject: [PATCH 6/8] arm64/fpsimd: SME no longer requires SVE register state Now that we track the type of the stored register state separately to what is active in the task, it is valid to have the FPSIMD register state stored while in streaming mode. Remove the special case handling for SME when setting FPSIMD register state. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-7-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 11cfeb6ae4752..e2f666e666bc8 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -619,7 +619,7 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE)) + if (task->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(task); } From 5252f329ca4b3ced3504d4c20195bae41c57ceb8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:39 +0000 Subject: [PATCH 7/8] arm64/sve: Leave SVE enabled on syscall if we don't context switch The syscall ABI says that the SVE register state not shared with FPSIMD may not be preserved on syscall, and this is the only mechanism we have in the ABI to stop tracking the extra SVE state for a process. Currently we do this unconditionally by means of disabling SVE for the process on syscall, causing userspace to take a trap to EL1 if it uses SVE again. These extra traps result in a noticeable overhead for using SVE instead of FPSIMD in some workloads, especially for simple syscalls where we can return directly to userspace and would not otherwise need to update the floating point registers. Tests with fp-pidbench show an approximately 70% overhead on a range of implementations when SVE is in use - while this is an extreme and entirely artificial benchmark it is clear that there is some useful room for improvement here. Now that we have the ability to track the decision about what to save seprately to TIF_SVE we can improve things by leaving TIF_SVE enabled on syscall but only saving the FPSIMD registers if we are in a syscall. This means that if we need to restore the register state from memory (eg, after a context switch or kernel mode NEON) we will drop TIF_SVE and reenable traps for userspace but if we can just return to userspace then traps will remain disabled. Since our current implementation and hence ABI has the effect of zeroing all the SVE register state not shared with FPSIMD on syscall we replace the disabling of TIF_SVE with a flush of the non-shared register state, this means that there is still some overhead for syscalls when SVE is in use but it is very much reduced. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-8-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 8 +++++++- arch/arm64/kernel/syscall.c | 13 +++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e2f666e666bc8..375f066e78ae3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -367,7 +367,13 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; - if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || + /* + * If a task is in a syscall the ABI allows us to only + * preserve the state shared with FPSIMD so don't bother + * saving the full SVE state in that case. + */ + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) && + !in_syscall(current_pt_regs())) || last->to_save == FP_STATE_SVE) { save_sve_regs = true; vl = last->sve_vl; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index befde0eaa5e79..fcd1a57ff0869 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -176,16 +176,9 @@ static inline void sve_user_discard(void) if (!system_supports_sve()) return; - clear_thread_flag(TIF_SVE); - - /* - * task_fpsimd_load() won't be called to update CPACR_EL1 in - * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only - * happens if a context switch or kernel_neon_begin() or context - * modification (sigreturn, ptrace) intervenes. - * So, ensure that CPACR_EL1 is already correct for the fast-path case. - */ - sve_user_disable(); + if (test_thread_flag(TIF_SVE)) { + sve_flush_live(); + } } void do_el0_svc(struct pt_regs *regs) From 970809267cfda70fdecfeffacc94d2150827e0e9 Mon Sep 17 00:00:00 2001 From: RamaMalladiAWS Date: Mon, 31 Jul 2023 15:56:30 +0000 Subject: [PATCH 8/8] Code change for "arm64/sve: Don't zero non-FPSIMD register state on syscall by default" https://lore.kernel.org/linux-arm-kernel/20220620124158.482039-6-broonie@kernel.org/T/#m4951d967db2b2091769db05d2ffc29ddf319b62b Other code changes for successfully merging this SVE patch fix patch to the kernel 5.10. --- arch/arm64/kernel/fpsimd.c | 7 ++++--- arch/arm64/kernel/syscall.c | 36 +++++++++++++++++++++++++++++++++++- arch/arm64/kvm/fpsimd.c | 3 ++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 375f066e78ae3..7d65a6b3766c9 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -374,7 +374,7 @@ static void fpsimd_save(void) */ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) && !in_syscall(current_pt_regs())) || - last->to_save == FP_STATE_SVE) { + last->to_save == FP_STATE_SVE) { save_sve_regs = true; vl = last->sve_vl; } @@ -392,8 +392,8 @@ static void fpsimd_save(void) } sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); + sve_ffr_offset(last->sve_vl), + &last->st->fpsr); *last->fp_type = FP_STATE_SVE; } else { fpsimd_save_state(last->st); @@ -1029,6 +1029,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) fpsimd_flush_task_state(current); fpsimd_to_sve(current); + current->thread.fp_type = FP_STATE_SVE; if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index fcd1a57ff0869..09f7b84c851a5 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -171,12 +171,46 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, syscall_trace_exit(regs); } +static unsigned int sve_syscall_regs_clear; + +#ifdef CONFIG_ARM64_SVE +/* + * Global sysctl to control if we force the SVE register state not + * shared with FPSIMD to be cleared on every syscall. If this is not + * enabled then we will leave the state unchanged unless we need to + * reload from memory (eg, after a context switch). + */ + +static struct ctl_table sve_syscall_sysctl_table[] = { + { + .procname = "sve_syscall_clear_regs", + .mode = 0644, + .data = &sve_syscall_regs_clear, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init sve_syscall_sysctl_init(void) +{ + if (!register_sysctl("abi", sve_syscall_sysctl_table)) + return -EINVAL; + return 0; +} + +core_initcall(sve_syscall_sysctl_init); +#endif /* CONFIG_ARM64_SVE */ + static inline void sve_user_discard(void) { if (!system_supports_sve()) return; - if (test_thread_flag(TIF_SVE)) { + if (sve_syscall_regs_clear && test_thread_flag(TIF_SVE)) { + sve_user_disable(); sve_flush_live(); } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index fa2972d400c74..4890d0917404d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -97,9 +97,10 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - &vcpu->arch.fp_type,to_save); + &vcpu->arch.fp_type, to_save); clear_thread_flag(TIF_FOREIGN_FPSTATE); + update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } }