| From 596356e37976739ea09d0be50cc45efe320a3f89 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Fri, 18 Jun 2021 13:54:09 +0200 |
| Subject: x86/sev: Split up runtime #VC handler for correct state tracking |
| |
| From: Joerg Roedel <jroedel@suse.de> |
| |
| [ Upstream commit be1a5408868af341f61f93c191b5e346ee88c82a ] |
| |
| Split up the #VC handler code into a from-user and a from-kernel part. |
| This allows clean and correct state tracking, as the #VC handler needs |
| to enter NMI-state when raised from kernel mode and plain IRQ state when |
| raised from user-mode. |
| |
| Fixes: 62441a1fb532 ("x86/sev-es: Correctly track IRQ states in runtime #VC handler") |
| Suggested-by: Peter Zijlstra <peterz@infradead.org> |
| Signed-off-by: Joerg Roedel <jroedel@suse.de> |
| Signed-off-by: Borislav Petkov <bp@suse.de> |
| Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/entry/entry_64.S | 4 +- |
| arch/x86/include/asm/idtentry.h | 29 +++---- |
| arch/x86/kernel/sev-es.c | 148 +++++++++++++++++--------------- |
| 3 files changed, 91 insertions(+), 90 deletions(-) |
| |
| diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
| index cad08703c4ad..f18f3932e971 100644 |
| --- a/arch/x86/entry/entry_64.S |
| +++ b/arch/x86/entry/entry_64.S |
| @@ -508,7 +508,7 @@ SYM_CODE_START(\asmsym) |
| |
| movq %rsp, %rdi /* pt_regs pointer */ |
| |
| - call \cfunc |
| + call kernel_\cfunc |
| |
| /* |
| * No need to switch back to the IST stack. The current stack is either |
| @@ -519,7 +519,7 @@ SYM_CODE_START(\asmsym) |
| |
| /* Switch to the regular task stack */ |
| .Lfrom_usermode_switch_stack_\@: |
| - idtentry_body safe_stack_\cfunc, has_error_code=1 |
| + idtentry_body user_\cfunc, has_error_code=1 |
| |
| _ASM_NOKPROBE(\asmsym) |
| SYM_CODE_END(\asmsym) |
| diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h |
| index 0e3325790f3a..dc2a8b1657f4 100644 |
| --- a/arch/x86/include/asm/idtentry.h |
| +++ b/arch/x86/include/asm/idtentry.h |
| @@ -315,8 +315,8 @@ static __always_inline void __##func(struct pt_regs *regs) |
| */ |
| #define DECLARE_IDTENTRY_VC(vector, func) \ |
| DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ |
| - __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ |
| - __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) |
| + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ |
| + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) |
| |
| /** |
| * DEFINE_IDTENTRY_IST - Emit code for IST entry points |
| @@ -358,33 +358,24 @@ static __always_inline void __##func(struct pt_regs *regs) |
| DEFINE_IDTENTRY_RAW_ERRORCODE(func) |
| |
| /** |
| - * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler |
| - which runs on a safe stack. |
| + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler |
| + when raised from kernel mode |
| * @func: Function name of the entry point |
| * |
| * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE |
| */ |
| -#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ |
| - DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) |
| +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ |
| + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) |
| |
| /** |
| - * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler |
| - which runs on the VC fall-back stack |
| + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler |
| + when raised from user mode |
| * @func: Function name of the entry point |
| * |
| * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE |
| */ |
| -#define DEFINE_IDTENTRY_VC_IST(func) \ |
| - DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) |
| - |
| -/** |
| - * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler |
| - * @func: Function name of the entry point |
| - * |
| - * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE |
| - */ |
| -#define DEFINE_IDTENTRY_VC(func) \ |
| - DEFINE_IDTENTRY_RAW_ERRORCODE(func) |
| +#define DEFINE_IDTENTRY_VC_USER(func) \ |
| + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) |
| |
| #else /* CONFIG_X86_64 */ |
| |
| diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c |
| index 0b5e35a51804..f3202b2e3c15 100644 |
| --- a/arch/x86/kernel/sev-es.c |
| +++ b/arch/x86/kernel/sev-es.c |
| @@ -759,7 +759,7 @@ void __init sev_es_init_vc_handling(void) |
| sev_es_setup_play_dead(); |
| |
| /* Secondary CPUs use the runtime #VC handler */ |
| - initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; |
| + initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; |
| } |
| |
| static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) |
| @@ -1197,14 +1197,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, |
| return ES_EXCEPTION; |
| } |
| |
| -static __always_inline void vc_handle_trap_db(struct pt_regs *regs) |
| -{ |
| - if (user_mode(regs)) |
| - noist_exc_debug(regs); |
| - else |
| - exc_debug(regs); |
| -} |
| - |
| static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, |
| struct ghcb *ghcb, |
| unsigned long exit_code) |
| @@ -1300,41 +1292,13 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) |
| return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); |
| } |
| |
| -/* |
| - * Main #VC exception handler. It is called when the entry code was able to |
| - * switch off the IST to a safe kernel stack. |
| - * |
| - * With the current implementation it is always possible to switch to a safe |
| - * stack because #VC exceptions only happen at known places, like intercepted |
| - * instructions or accesses to MMIO areas/IO ports. They can also happen with |
| - * code instrumentation when the hypervisor intercepts #DB, but the critical |
| - * paths are forbidden to be instrumented, so #DB exceptions currently also |
| - * only happen in safe places. |
| - */ |
| -DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) |
| +static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) |
| { |
| - irqentry_state_t irq_state; |
| struct ghcb_state state; |
| struct es_em_ctxt ctxt; |
| enum es_result result; |
| struct ghcb *ghcb; |
| - |
| - /* |
| - * Handle #DB before calling into !noinstr code to avoid recursive #DB. |
| - */ |
| - if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { |
| - vc_handle_trap_db(regs); |
| - return; |
| - } |
| - |
| - irq_state = irqentry_nmi_enter(regs); |
| - instrumentation_begin(); |
| - |
| - /* |
| - * This is invoked through an interrupt gate, so IRQs are disabled. The |
| - * code below might walk page-tables for user or kernel addresses, so |
| - * keep the IRQs disabled to protect us against concurrent TLB flushes. |
| - */ |
| + bool ret = true; |
| |
| ghcb = __sev_get_ghcb(&state); |
| |
| @@ -1354,15 +1318,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) |
| case ES_UNSUPPORTED: |
| pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", |
| error_code, regs->ip); |
| - goto fail; |
| + ret = false; |
| + break; |
| case ES_VMM_ERROR: |
| pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", |
| error_code, regs->ip); |
| - goto fail; |
| + ret = false; |
| + break; |
| case ES_DECODE_FAILED: |
| pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", |
| error_code, regs->ip); |
| - goto fail; |
| + ret = false; |
| + break; |
| case ES_EXCEPTION: |
| vc_forward_exception(&ctxt); |
| break; |
| @@ -1378,24 +1345,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) |
| BUG(); |
| } |
| |
| -out: |
| - instrumentation_end(); |
| - irqentry_nmi_exit(regs, irq_state); |
| + return ret; |
| +} |
| |
| - return; |
| +static __always_inline bool vc_is_db(unsigned long error_code) |
| +{ |
| + return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; |
| +} |
| |
| -fail: |
| - if (user_mode(regs)) { |
| - /* |
| - * Do not kill the machine if user-space triggered the |
| - * exception. Send SIGBUS instead and let user-space deal with |
| - * it. |
| - */ |
| - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); |
| - } else { |
| - pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", |
| - result); |
| +/* |
| + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode |
| + * and will panic when an error happens. |
| + */ |
| +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) |
| +{ |
| + irqentry_state_t irq_state; |
| + |
| + /* |
| + * With the current implementation it is always possible to switch to a |
| + * safe stack because #VC exceptions only happen at known places, like |
| + * intercepted instructions or accesses to MMIO areas/IO ports. They can |
| + * also happen with code instrumentation when the hypervisor intercepts |
| + * #DB, but the critical paths are forbidden to be instrumented, so #DB |
| + * exceptions currently also only happen in safe places. |
| + * |
| + * But keep this here in case the noinstr annotations are violated due |
| + * to bug elsewhere. |
| + */ |
| + if (unlikely(on_vc_fallback_stack(regs))) { |
| + instrumentation_begin(); |
| + panic("Can't handle #VC exception from unsupported context\n"); |
| + instrumentation_end(); |
| + } |
| + |
| + /* |
| + * Handle #DB before calling into !noinstr code to avoid recursive #DB. |
| + */ |
| + if (vc_is_db(error_code)) { |
| + exc_debug(regs); |
| + return; |
| + } |
| + |
| + irq_state = irqentry_nmi_enter(regs); |
| |
| + instrumentation_begin(); |
| + |
| + if (!vc_raw_handle_exception(regs, error_code)) { |
| /* Show some debug info */ |
| show_regs(regs); |
| |
| @@ -1406,23 +1401,38 @@ fail: |
| panic("Returned from Terminate-Request to Hypervisor\n"); |
| } |
| |
| - goto out; |
| + instrumentation_end(); |
| + irqentry_nmi_exit(regs, irq_state); |
| } |
| |
| -/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ |
| -DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) |
| +/* |
| + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode |
| + * and will kill the current task with SIGBUS when an error happens. |
| + */ |
| +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) |
| { |
| + /* |
| + * Handle #DB before calling into !noinstr code to avoid recursive #DB. |
| + */ |
| + if (vc_is_db(error_code)) { |
| + noist_exc_debug(regs); |
| + return; |
| + } |
| + |
| + irqentry_enter_from_user_mode(regs); |
| instrumentation_begin(); |
| - panic("Can't handle #VC exception from unsupported context\n"); |
| - instrumentation_end(); |
| -} |
| |
| -DEFINE_IDTENTRY_VC(exc_vmm_communication) |
| -{ |
| - if (likely(!on_vc_fallback_stack(regs))) |
| - safe_stack_exc_vmm_communication(regs, error_code); |
| - else |
| - ist_exc_vmm_communication(regs, error_code); |
| + if (!vc_raw_handle_exception(regs, error_code)) { |
| + /* |
| + * Do not kill the machine if user-space triggered the |
| + * exception. Send SIGBUS instead and let user-space deal with |
| + * it. |
| + */ |
| + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); |
| + } |
| + |
| + instrumentation_end(); |
| + irqentry_exit_to_user_mode(regs); |
| } |
| |
| bool __init handle_vc_boot_ghcb(struct pt_regs *regs) |
| -- |
| 2.30.2 |
| |