[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] x86: avoid calling {svm,vmx}_do_resume()



commit e6ebd394385db52855d1517cea829ffff68b34b8
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Dec 15 13:41:23 2020 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Dec 15 13:41:23 2020 +0100

    x86: avoid calling {svm,vmx}_do_resume()
    
    These functions follow the following path: hvm_do_resume() ->
    handle_hvm_io_completion() -> hvm_wait_for_io() ->
    wait_on_xen_event_channel() -> do_softirq() -> schedule() ->
    sched_context_switch() -> continue_running() and hence may
    recursively invoke themselves. If this ends up happening a couple of
    times, a stack overflow would result.
    
    Prevent this by also resetting the stack at the
    ->arch.ctxt_switch->tail() invocations (in both places for consistency)
    and thus jumping to the functions instead of calling them.
    
    This is XSA-348 / CVE-2020-29566.
    
    Reported-by: Julien Grall <jgrall@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/arch/x86/domain.c             | 21 ++++-----------------
 xen/arch/x86/hvm/svm/svm.c        |  3 ++-
 xen/arch/x86/hvm/vmx/vmcs.c       |  3 ++-
 xen/arch/x86/pv/domain.c          |  2 +-
 xen/include/asm-x86/current.h     | 13 ++++++++++---
 xen/include/asm-x86/domain.h      |  2 +-
 xen/include/asm-x86/hvm/vmx/vmx.h |  2 +-
 7 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 3f8d8be280..4932ed62f1 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -130,7 +130,7 @@ void play_dead(void)
         dead_idle();
 }
 
-static void idle_loop(void)
+static void noreturn idle_loop(void)
 {
     unsigned int cpu = smp_processor_id();
     /*
@@ -182,11 +182,6 @@ void startup_cpu_idle_loop(void)
     reset_stack_and_jump(idle_loop);
 }
 
-static void noreturn continue_idle_domain(struct vcpu *v)
-{
-    reset_stack_and_jump(idle_loop);
-}
-
 void init_hypercall_page(struct domain *d, void *ptr)
 {
     memset(ptr, 0xcc, PAGE_SIZE);
@@ -710,7 +705,7 @@ int arch_domain_create(struct domain *d,
         static const struct arch_csw idle_csw = {
             .from = paravirt_ctxt_switch_from,
             .to   = paravirt_ctxt_switch_to,
-            .tail = continue_idle_domain,
+            .tail = idle_loop,
         };
 
         d->arch.ctxt_switch = &idle_csw;
@@ -2047,20 +2042,12 @@ void context_switch(struct vcpu *prev, struct vcpu 
*next)
     /* Ensure that the vcpu has an up-to-date time base. */
     update_vcpu_system_time(next);
 
-    /*
-     * Schedule tail *should* be a terminal function pointer, but leave a
-     * bug frame around just in case it returns, to save going back into the
-     * context switching code and leaving a far more subtle crash to diagnose.
-     */
-    nextd->arch.ctxt_switch->tail(next);
-    BUG();
+    reset_stack_and_jump_ind(nextd->arch.ctxt_switch->tail);
 }
 
 void continue_running(struct vcpu *same)
 {
-    /* See the comment above. */
-    same->domain->arch.ctxt_switch->tail(same);
-    BUG();
+    reset_stack_and_jump_ind(same->domain->arch.ctxt_switch->tail);
 }
 
 int __sync_local_execstate(void)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index e41330ca55..d90627d4f7 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -991,8 +991,9 @@ static void svm_ctxt_switch_to(struct vcpu *v)
         wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
 }
 
-static void noreturn svm_do_resume(struct vcpu *v)
+static void noreturn svm_do_resume(void)
 {
+    struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
     bool debug_state = (v->domain->debugger_attached ||
                         v->domain->arch.monitor.software_breakpoint_enabled ||
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index a71b935c10..1466064d0c 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1850,8 +1850,9 @@ void vmx_vmentry_failure(void)
     domain_crash(curr->domain);
 }
 
-void vmx_do_resume(struct vcpu *v)
+void vmx_do_resume(void)
 {
+    struct vcpu *v = current;
     bool_t debug_state;
     unsigned long host_cr4;
 
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 27dc0f8ed7..1a607f856e 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -110,7 +110,7 @@ static int parse_pcid(const char *s)
     return rc;
 }
 
-static void noreturn continue_nonidle_domain(struct vcpu *v)
+static void noreturn continue_nonidle_domain(void)
 {
     check_wakeup_from_wait();
     reset_stack_and_jump(ret_from_intr);
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index b47addb3c8..4d8822f78c 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -155,18 +155,18 @@ unsigned long get_stack_dump_bottom (unsigned long sp);
 # define SHADOW_STACK_WORK ""
 #endif
 
-#define reset_stack_and_jump(fn)                                        \
+#define switch_stack_and_jump(fn, instr, constr)                        \
     ({                                                                  \
         unsigned int tmp;                                               \
         __asm__ __volatile__ (                                          \
             SHADOW_STACK_WORK                                           \
             "mov %[stk], %%rsp;"                                        \
             CHECK_FOR_LIVEPATCH_WORK                                    \
-            "jmp %c[fun];"                                              \
+            instr "[fun]"                                               \
             : [val] "=&r" (tmp),                                        \
               [ssp] "=&r" (tmp)                                         \
             : [stk] "r" (guest_cpu_user_regs()),                        \
-              [fun] "i" (fn),                                           \
+              [fun] constr (fn),                                        \
               [skstk_base] "i"                                          \
               ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8),               \
               [stack_mask] "i" (STACK_SIZE - 1),                        \
@@ -176,6 +176,13 @@ unsigned long get_stack_dump_bottom (unsigned long sp);
         unreachable();                                                  \
     })
 
+#define reset_stack_and_jump(fn)                                        \
+    switch_stack_and_jump(fn, "jmp %c", "i")
+
+/* The constraint may only specify non-call-clobbered registers. */
+#define reset_stack_and_jump_ind(fn)                                    \
+    switch_stack_and_jump(fn, "INDIRECT_JMP %", "b")
+
 /*
  * Which VCPU's state is currently running on each CPU?
  * This is not necesasrily the same as 'current' as a CPU may be
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index df657dc69f..3900d7b48b 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -337,7 +337,7 @@ struct arch_domain
     const struct arch_csw {
         void (*from)(struct vcpu *);
         void (*to)(struct vcpu *);
-        void (*tail)(struct vcpu *);
+        void noreturn (*tail)(void);
     } *ctxt_switch;
 
 #ifdef CONFIG_HVM
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index 111ccd7e61..09ea0fa2cd 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -95,7 +95,7 @@ typedef enum {
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
-void noreturn vmx_do_resume(struct vcpu *);
+void noreturn vmx_do_resume(void);
 void vmx_vlapic_msr_changed(struct vcpu *v);
 struct hvm_emulate_ctxt;
 void vmx_realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt);
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.