7
7
* Author: Joerg Roedel <[email protected] >
8
8
*/
9
9
10
+ #define pr_fmt (fmt ) "SEV-ES: " fmt
11
+
10
12
#include <linux/sched/debug.h> /* For show_regs() */
11
13
#include <linux/percpu-defs.h>
12
14
#include <linux/mem_encrypt.h>
15
+ #include <linux/lockdep.h>
13
16
#include <linux/printk.h>
14
17
#include <linux/mm_types.h>
15
18
#include <linux/set_memory.h>
22
25
#include <asm/insn-eval.h>
23
26
#include <asm/fpu/internal.h>
24
27
#include <asm/processor.h>
25
- #include <asm/trap_pf .h>
26
- #include <asm/trapnr .h>
28
+ #include <asm/realmode .h>
29
+ #include <asm/traps .h>
27
30
#include <asm/svm.h>
28
31
29
32
/* For early boot hypervisor communication in SEV-ES enabled guests */
@@ -48,11 +51,43 @@ struct sev_es_runtime_data {
48
51
* interrupted stack in the #VC entry code.
49
52
*/
50
53
char fallback_stack [EXCEPTION_STKSZ ] __aligned (PAGE_SIZE );
54
+
55
+ /*
56
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
57
+ * It is needed when an NMI happens while the #VC handler uses the real
58
+ * GHCB, and the NMI handler itself is causing another #VC exception. In
59
+ * that case the GHCB content of the first handler needs to be backed up
60
+ * and restored.
61
+ */
62
+ struct ghcb backup_ghcb ;
63
+
64
+ /*
65
+ * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
66
+ * There is no need for it to be atomic, because nothing is written to
67
+ * the GHCB between the read and the write of ghcb_active. So it is safe
68
+ * to use it when a nested #VC exception happens before the write.
69
+ *
70
+ * This is necessary for example in the #VC->NMI->#VC case when the NMI
71
+ * happens while the first #VC handler uses the GHCB. When the NMI code
72
+ * raises a second #VC handler it might overwrite the contents of the
73
+ * GHCB written by the first handler. To avoid this the content of the
74
+ * GHCB is saved and restored when the GHCB is detected to be in use
75
+ * already.
76
+ */
77
+ bool ghcb_active ;
78
+ bool backup_ghcb_active ;
79
+ };
80
+
81
+ struct ghcb_state {
82
+ struct ghcb * ghcb ;
51
83
};
52
84
53
85
static DEFINE_PER_CPU (struct sev_es_runtime_data * , runtime_data );
54
86
DEFINE_STATIC_KEY_FALSE (sev_es_enable_key );
55
87
88
+ /* Needed in vc_early_forward_exception */
89
+ void do_early_exception (struct pt_regs * regs , int trapnr );
90
+
56
91
static void __init setup_vc_stacks (int cpu )
57
92
{
58
93
struct sev_es_runtime_data * data ;
@@ -123,8 +158,52 @@ void noinstr __sev_es_ist_exit(void)
123
158
this_cpu_write (cpu_tss_rw .x86_tss .ist [IST_INDEX_VC ], * (unsigned long * )ist );
124
159
}
125
160
126
- /* Needed in vc_early_forward_exception */
127
- void do_early_exception (struct pt_regs * regs , int trapnr );
161
+ static __always_inline struct ghcb * sev_es_get_ghcb (struct ghcb_state * state )
162
+ {
163
+ struct sev_es_runtime_data * data ;
164
+ struct ghcb * ghcb ;
165
+
166
+ data = this_cpu_read (runtime_data );
167
+ ghcb = & data -> ghcb_page ;
168
+
169
+ if (unlikely (data -> ghcb_active )) {
170
+ /* GHCB is already in use - save its contents */
171
+
172
+ if (unlikely (data -> backup_ghcb_active ))
173
+ return NULL ;
174
+
175
+ /* Mark backup_ghcb active before writing to it */
176
+ data -> backup_ghcb_active = true;
177
+
178
+ state -> ghcb = & data -> backup_ghcb ;
179
+
180
+ /* Backup GHCB content */
181
+ * state -> ghcb = * ghcb ;
182
+ } else {
183
+ state -> ghcb = NULL ;
184
+ data -> ghcb_active = true;
185
+ }
186
+
187
+ return ghcb ;
188
+ }
189
+
190
+ static __always_inline void sev_es_put_ghcb (struct ghcb_state * state )
191
+ {
192
+ struct sev_es_runtime_data * data ;
193
+ struct ghcb * ghcb ;
194
+
195
+ data = this_cpu_read (runtime_data );
196
+ ghcb = & data -> ghcb_page ;
197
+
198
+ if (state -> ghcb ) {
199
+ /* Restore GHCB from Backup */
200
+ * ghcb = * state -> ghcb ;
201
+ data -> backup_ghcb_active = false;
202
+ state -> ghcb = NULL ;
203
+ } else {
204
+ data -> ghcb_active = false;
205
+ }
206
+ }
128
207
129
208
static inline u64 sev_es_rd_ghcb_msr (void )
130
209
{
@@ -316,6 +395,9 @@ static void __init init_ghcb(int cpu)
316
395
panic ("Can't map GHCBs unencrypted" );
317
396
318
397
memset (& data -> ghcb_page , 0 , sizeof (data -> ghcb_page ));
398
+
399
+ data -> ghcb_active = false;
400
+ data -> backup_ghcb_active = false;
319
401
}
320
402
321
403
void __init sev_es_init_vc_handling (void )
@@ -336,6 +418,9 @@ void __init sev_es_init_vc_handling(void)
336
418
init_ghcb (cpu );
337
419
setup_vc_stacks (cpu );
338
420
}
421
+
422
+ /* Secondary CPUs use the runtime #VC handler */
423
+ initial_vc_handler = (unsigned long )safe_stack_exc_vmm_communication ;
339
424
}
340
425
341
426
static void __init vc_early_forward_exception (struct es_em_ctxt * ctxt )
@@ -366,6 +451,159 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
366
451
return result ;
367
452
}
368
453
454
+ static __always_inline void vc_forward_exception (struct es_em_ctxt * ctxt )
455
+ {
456
+ long error_code = ctxt -> fi .error_code ;
457
+ int trapnr = ctxt -> fi .vector ;
458
+
459
+ ctxt -> regs -> orig_ax = ctxt -> fi .error_code ;
460
+
461
+ switch (trapnr ) {
462
+ case X86_TRAP_GP :
463
+ exc_general_protection (ctxt -> regs , error_code );
464
+ break ;
465
+ case X86_TRAP_UD :
466
+ exc_invalid_op (ctxt -> regs );
467
+ break ;
468
+ default :
469
+ pr_emerg ("Unsupported exception in #VC instruction emulation - can't continue\n" );
470
+ BUG ();
471
+ }
472
+ }
473
+
474
+ static __always_inline bool on_vc_fallback_stack (struct pt_regs * regs )
475
+ {
476
+ unsigned long sp = (unsigned long )regs ;
477
+
478
+ return (sp >= __this_cpu_ist_bottom_va (VC2 ) && sp < __this_cpu_ist_top_va (VC2 ));
479
+ }
480
+
481
+ /*
482
+ * Main #VC exception handler. It is called when the entry code was able to
483
+ * switch off the IST to a safe kernel stack.
484
+ *
485
+ * With the current implementation it is always possible to switch to a safe
486
+ * stack because #VC exceptions only happen at known places, like intercepted
487
+ * instructions or accesses to MMIO areas/IO ports. They can also happen with
488
+ * code instrumentation when the hypervisor intercepts #DB, but the critical
489
+ * paths are forbidden to be instrumented, so #DB exceptions currently also
490
+ * only happen in safe places.
491
+ */
492
+ DEFINE_IDTENTRY_VC_SAFE_STACK (exc_vmm_communication )
493
+ {
494
+ struct sev_es_runtime_data * data = this_cpu_read (runtime_data );
495
+ struct ghcb_state state ;
496
+ struct es_em_ctxt ctxt ;
497
+ enum es_result result ;
498
+ struct ghcb * ghcb ;
499
+
500
+ lockdep_assert_irqs_disabled ();
501
+ instrumentation_begin ();
502
+
503
+ /*
504
+ * This is invoked through an interrupt gate, so IRQs are disabled. The
505
+ * code below might walk page-tables for user or kernel addresses, so
506
+ * keep the IRQs disabled to protect us against concurrent TLB flushes.
507
+ */
508
+
509
+ ghcb = sev_es_get_ghcb (& state );
510
+ if (!ghcb ) {
511
+ /*
512
+ * Mark GHCBs inactive so that panic() is able to print the
513
+ * message.
514
+ */
515
+ data -> ghcb_active = false;
516
+ data -> backup_ghcb_active = false;
517
+
518
+ panic ("Unable to handle #VC exception! GHCB and Backup GHCB are already in use" );
519
+ }
520
+
521
+ vc_ghcb_invalidate (ghcb );
522
+ result = vc_init_em_ctxt (& ctxt , regs , error_code );
523
+
524
+ if (result == ES_OK )
525
+ result = vc_handle_exitcode (& ctxt , ghcb , error_code );
526
+
527
+ sev_es_put_ghcb (& state );
528
+
529
+ /* Done - now check the result */
530
+ switch (result ) {
531
+ case ES_OK :
532
+ vc_finish_insn (& ctxt );
533
+ break ;
534
+ case ES_UNSUPPORTED :
535
+ pr_err_ratelimited ("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n" ,
536
+ error_code , regs -> ip );
537
+ goto fail ;
538
+ case ES_VMM_ERROR :
539
+ pr_err_ratelimited ("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n" ,
540
+ error_code , regs -> ip );
541
+ goto fail ;
542
+ case ES_DECODE_FAILED :
543
+ pr_err_ratelimited ("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n" ,
544
+ error_code , regs -> ip );
545
+ goto fail ;
546
+ case ES_EXCEPTION :
547
+ vc_forward_exception (& ctxt );
548
+ break ;
549
+ case ES_RETRY :
550
+ /* Nothing to do */
551
+ break ;
552
+ default :
553
+ pr_emerg ("Unknown result in %s():%d\n" , __func__ , result );
554
+ /*
555
+ * Emulating the instruction which caused the #VC exception
556
+ * failed - can't continue so print debug information
557
+ */
558
+ BUG ();
559
+ }
560
+
561
+ out :
562
+ instrumentation_end ();
563
+
564
+ return ;
565
+
566
+ fail :
567
+ if (user_mode (regs )) {
568
+ /*
569
+ * Do not kill the machine if user-space triggered the
570
+ * exception. Send SIGBUS instead and let user-space deal with
571
+ * it.
572
+ */
573
+ force_sig_fault (SIGBUS , BUS_OBJERR , (void __user * )0 );
574
+ } else {
575
+ pr_emerg ("PANIC: Unhandled #VC exception in kernel space (result=%d)\n" ,
576
+ result );
577
+
578
+ /* Show some debug info */
579
+ show_regs (regs );
580
+
581
+ /* Ask hypervisor to sev_es_terminate */
582
+ sev_es_terminate (GHCB_SEV_ES_REASON_GENERAL_REQUEST );
583
+
584
+ /* If that fails and we get here - just panic */
585
+ panic ("Returned from Terminate-Request to Hypervisor\n" );
586
+ }
587
+
588
+ goto out ;
589
+ }
590
+
591
+ /* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
592
+ DEFINE_IDTENTRY_VC_IST (exc_vmm_communication )
593
+ {
594
+ instrumentation_begin ();
595
+ panic ("Can't handle #VC exception from unsupported context\n" );
596
+ instrumentation_end ();
597
+ }
598
+
599
+ DEFINE_IDTENTRY_VC (exc_vmm_communication )
600
+ {
601
+ if (likely (!on_vc_fallback_stack (regs )))
602
+ safe_stack_exc_vmm_communication (regs , error_code );
603
+ else
604
+ ist_exc_vmm_communication (regs , error_code );
605
+ }
606
+
369
607
bool __init handle_vc_boot_ghcb (struct pt_regs * regs )
370
608
{
371
609
unsigned long exit_code = regs -> orig_ax ;
0 commit comments