Skip to content

Commit 0786138

Browse files
tlendackysuryasaimadhu
authored andcommitted
x86/sev-es: Add a Runtime #VC Exception Handler
Add the handlers for #VC exceptions invoked at runtime. Signed-off-by: Tom Lendacky <[email protected]> Signed-off-by: Joerg Roedel <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent a13644f commit 0786138

File tree

3 files changed

+255
-8
lines changed

3 files changed

+255
-8
lines changed

arch/x86/include/asm/idtentry.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ static __always_inline void __##func(struct pt_regs *regs)
318318
*/
319319
#define DECLARE_IDTENTRY_VC(vector, func) \
320320
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
321+
__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
321322
__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
322323

323324
/**
@@ -608,6 +609,11 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug);
608609
/* #DF */
609610
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
610611

612+
/* #VC */
613+
#ifdef CONFIG_AMD_MEM_ENCRYPT
614+
DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
615+
#endif
616+
611617
#ifdef CONFIG_XEN_PV
612618
DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
613619
#endif

arch/x86/kernel/idt.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,14 @@ static const __initconst struct idt_data early_pf_idts[] = {
229229
* cpu_init() when the TSS has been initialized.
230230
*/
231231
static const __initconst struct idt_data ist_idts[] = {
232-
ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
233-
ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
234-
ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
232+
ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
233+
ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
234+
ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
235235
#ifdef CONFIG_X86_MCE
236-
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
236+
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
237+
#endif
238+
#ifdef CONFIG_AMD_MEM_ENCRYPT
239+
ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
237240
#endif
238241
};
239242

arch/x86/kernel/sev-es.c

Lines changed: 242 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
* Author: Joerg Roedel <[email protected]>
88
*/
99

10+
#define pr_fmt(fmt) "SEV-ES: " fmt
11+
1012
#include <linux/sched/debug.h> /* For show_regs() */
1113
#include <linux/percpu-defs.h>
1214
#include <linux/mem_encrypt.h>
15+
#include <linux/lockdep.h>
1316
#include <linux/printk.h>
1417
#include <linux/mm_types.h>
1518
#include <linux/set_memory.h>
@@ -22,8 +25,8 @@
2225
#include <asm/insn-eval.h>
2326
#include <asm/fpu/internal.h>
2427
#include <asm/processor.h>
25-
#include <asm/trap_pf.h>
26-
#include <asm/trapnr.h>
28+
#include <asm/realmode.h>
29+
#include <asm/traps.h>
2730
#include <asm/svm.h>
2831

2932
/* For early boot hypervisor communication in SEV-ES enabled guests */
@@ -48,11 +51,43 @@ struct sev_es_runtime_data {
4851
* interrupted stack in the #VC entry code.
4952
*/
5053
char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
54+
55+
/*
56+
* Reserve one page per CPU as backup storage for the unencrypted GHCB.
57+
* It is needed when an NMI happens while the #VC handler uses the real
58+
* GHCB, and the NMI handler itself is causing another #VC exception. In
59+
* that case the GHCB content of the first handler needs to be backed up
60+
* and restored.
61+
*/
62+
struct ghcb backup_ghcb;
63+
64+
/*
65+
* Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
66+
* There is no need for it to be atomic, because nothing is written to
67+
* the GHCB between the read and the write of ghcb_active. So it is safe
68+
* to use it when a nested #VC exception happens before the write.
69+
*
70+
* This is necessary for example in the #VC->NMI->#VC case when the NMI
71+
* happens while the first #VC handler uses the GHCB. When the NMI code
72+
* raises a second #VC handler it might overwrite the contents of the
73+
* GHCB written by the first handler. To avoid this the content of the
74+
* GHCB is saved and restored when the GHCB is detected to be in use
75+
* already.
76+
*/
77+
bool ghcb_active;
78+
bool backup_ghcb_active;
79+
};
80+
81+
struct ghcb_state {
82+
struct ghcb *ghcb;
5183
};
5284

5385
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
5486
DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
5587

88+
/* Needed in vc_early_forward_exception */
89+
void do_early_exception(struct pt_regs *regs, int trapnr);
90+
5691
static void __init setup_vc_stacks(int cpu)
5792
{
5893
struct sev_es_runtime_data *data;
@@ -123,8 +158,52 @@ void noinstr __sev_es_ist_exit(void)
123158
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
124159
}
125160

126-
/* Needed in vc_early_forward_exception */
127-
void do_early_exception(struct pt_regs *regs, int trapnr);
161+
static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
162+
{
163+
struct sev_es_runtime_data *data;
164+
struct ghcb *ghcb;
165+
166+
data = this_cpu_read(runtime_data);
167+
ghcb = &data->ghcb_page;
168+
169+
if (unlikely(data->ghcb_active)) {
170+
/* GHCB is already in use - save its contents */
171+
172+
if (unlikely(data->backup_ghcb_active))
173+
return NULL;
174+
175+
/* Mark backup_ghcb active before writing to it */
176+
data->backup_ghcb_active = true;
177+
178+
state->ghcb = &data->backup_ghcb;
179+
180+
/* Backup GHCB content */
181+
*state->ghcb = *ghcb;
182+
} else {
183+
state->ghcb = NULL;
184+
data->ghcb_active = true;
185+
}
186+
187+
return ghcb;
188+
}
189+
190+
static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
191+
{
192+
struct sev_es_runtime_data *data;
193+
struct ghcb *ghcb;
194+
195+
data = this_cpu_read(runtime_data);
196+
ghcb = &data->ghcb_page;
197+
198+
if (state->ghcb) {
199+
/* Restore GHCB from Backup */
200+
*ghcb = *state->ghcb;
201+
data->backup_ghcb_active = false;
202+
state->ghcb = NULL;
203+
} else {
204+
data->ghcb_active = false;
205+
}
206+
}
128207

129208
static inline u64 sev_es_rd_ghcb_msr(void)
130209
{
@@ -316,6 +395,9 @@ static void __init init_ghcb(int cpu)
316395
panic("Can't map GHCBs unencrypted");
317396

318397
memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
398+
399+
data->ghcb_active = false;
400+
data->backup_ghcb_active = false;
319401
}
320402

321403
void __init sev_es_init_vc_handling(void)
@@ -336,6 +418,9 @@ void __init sev_es_init_vc_handling(void)
336418
init_ghcb(cpu);
337419
setup_vc_stacks(cpu);
338420
}
421+
422+
/* Secondary CPUs use the runtime #VC handler */
423+
initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
339424
}
340425

341426
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
@@ -366,6 +451,159 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
366451
return result;
367452
}
368453

454+
static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
455+
{
456+
long error_code = ctxt->fi.error_code;
457+
int trapnr = ctxt->fi.vector;
458+
459+
ctxt->regs->orig_ax = ctxt->fi.error_code;
460+
461+
switch (trapnr) {
462+
case X86_TRAP_GP:
463+
exc_general_protection(ctxt->regs, error_code);
464+
break;
465+
case X86_TRAP_UD:
466+
exc_invalid_op(ctxt->regs);
467+
break;
468+
default:
469+
pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
470+
BUG();
471+
}
472+
}
473+
474+
static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
475+
{
476+
unsigned long sp = (unsigned long)regs;
477+
478+
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
479+
}
480+
481+
/*
482+
* Main #VC exception handler. It is called when the entry code was able to
483+
* switch off the IST to a safe kernel stack.
484+
*
485+
* With the current implementation it is always possible to switch to a safe
486+
* stack because #VC exceptions only happen at known places, like intercepted
487+
* instructions or accesses to MMIO areas/IO ports. They can also happen with
488+
* code instrumentation when the hypervisor intercepts #DB, but the critical
489+
* paths are forbidden to be instrumented, so #DB exceptions currently also
490+
* only happen in safe places.
491+
*/
492+
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
493+
{
494+
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
495+
struct ghcb_state state;
496+
struct es_em_ctxt ctxt;
497+
enum es_result result;
498+
struct ghcb *ghcb;
499+
500+
lockdep_assert_irqs_disabled();
501+
instrumentation_begin();
502+
503+
/*
504+
* This is invoked through an interrupt gate, so IRQs are disabled. The
505+
* code below might walk page-tables for user or kernel addresses, so
506+
* keep the IRQs disabled to protect us against concurrent TLB flushes.
507+
*/
508+
509+
ghcb = sev_es_get_ghcb(&state);
510+
if (!ghcb) {
511+
/*
512+
* Mark GHCBs inactive so that panic() is able to print the
513+
* message.
514+
*/
515+
data->ghcb_active = false;
516+
data->backup_ghcb_active = false;
517+
518+
panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
519+
}
520+
521+
vc_ghcb_invalidate(ghcb);
522+
result = vc_init_em_ctxt(&ctxt, regs, error_code);
523+
524+
if (result == ES_OK)
525+
result = vc_handle_exitcode(&ctxt, ghcb, error_code);
526+
527+
sev_es_put_ghcb(&state);
528+
529+
/* Done - now check the result */
530+
switch (result) {
531+
case ES_OK:
532+
vc_finish_insn(&ctxt);
533+
break;
534+
case ES_UNSUPPORTED:
535+
pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
536+
error_code, regs->ip);
537+
goto fail;
538+
case ES_VMM_ERROR:
539+
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
540+
error_code, regs->ip);
541+
goto fail;
542+
case ES_DECODE_FAILED:
543+
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
544+
error_code, regs->ip);
545+
goto fail;
546+
case ES_EXCEPTION:
547+
vc_forward_exception(&ctxt);
548+
break;
549+
case ES_RETRY:
550+
/* Nothing to do */
551+
break;
552+
default:
553+
pr_emerg("Unknown result in %s():%d\n", __func__, result);
554+
/*
555+
* Emulating the instruction which caused the #VC exception
556+
* failed - can't continue so print debug information
557+
*/
558+
BUG();
559+
}
560+
561+
out:
562+
instrumentation_end();
563+
564+
return;
565+
566+
fail:
567+
if (user_mode(regs)) {
568+
/*
569+
* Do not kill the machine if user-space triggered the
570+
* exception. Send SIGBUS instead and let user-space deal with
571+
* it.
572+
*/
573+
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
574+
} else {
575+
pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
576+
result);
577+
578+
/* Show some debug info */
579+
show_regs(regs);
580+
581+
/* Ask hypervisor to sev_es_terminate */
582+
sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
583+
584+
/* If that fails and we get here - just panic */
585+
panic("Returned from Terminate-Request to Hypervisor\n");
586+
}
587+
588+
goto out;
589+
}
590+
591+
/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
592+
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
593+
{
594+
instrumentation_begin();
595+
panic("Can't handle #VC exception from unsupported context\n");
596+
instrumentation_end();
597+
}
598+
599+
DEFINE_IDTENTRY_VC(exc_vmm_communication)
600+
{
601+
if (likely(!on_vc_fallback_stack(regs)))
602+
safe_stack_exc_vmm_communication(regs, error_code);
603+
else
604+
ist_exc_vmm_communication(regs, error_code);
605+
}
606+
369607
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
370608
{
371609
unsigned long exit_code = regs->orig_ax;

0 commit comments

Comments
 (0)