[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Add a compile time option to enable domain 0 running in ring 0.



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID ee8041b0ab86f9315476f718da57af38cbf2eed7
# Parent  6060937db0fe568d34b83d015ebe22b86194faa9
Add a compile time option to enable domain 0 running in ring 0.

In this mode only a single guest kernel is supported.

This mode only works for x86/32 (not x86/64).

Signed-off-by: Ian Campbell <Ian.Campbell@xxxxxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/Makefile     Mon Feb 27 14:52:43 2006
@@ -31,6 +31,10 @@
  else
   OBJS += shadow32.o                   # x86_32: old code
  endif
+endif
+
+ifneq ($(supervisor_mode_kernel),y)
+OBJS := $(subst x86_32/supervisor_mode_kernel.o,,$(OBJS))
 endif
 
 OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/Rules.mk
--- a/xen/arch/x86/Rules.mk     Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/Rules.mk     Mon Feb 27 14:52:43 2006
@@ -6,6 +6,7 @@
 # 'make clean' before rebuilding.
 #
 pae ?= n
+supervisor_mode_kernel ?= n
 
 CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
 CFLAGS  += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe
@@ -32,6 +33,9 @@
 CFLAGS  += -DCONFIG_X86_PAE=1
 endif
 endif
+ifeq ($(supervisor_mode_kernel),y)
+CFLAGS  += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
+endif
 
 ifeq ($(TARGET_SUBARCH),x86_64)
 CFLAGS  += -m64 -mno-red-zone -fpic -fno-reorder-blocks
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/domain.c     Mon Feb 27 14:52:43 2006
@@ -351,17 +351,17 @@
 
     if ( !(c->flags & VGCF_HVM_GUEST) )
     {
-        fixup_guest_selector(c->user_regs.ss);
-        fixup_guest_selector(c->kernel_ss);
-        fixup_guest_selector(c->user_regs.cs);
+        fixup_guest_stack_selector(c->user_regs.ss);
+        fixup_guest_stack_selector(c->kernel_ss);
+        fixup_guest_code_selector(c->user_regs.cs);
 
 #ifdef __i386__
-        fixup_guest_selector(c->event_callback_cs);
-        fixup_guest_selector(c->failsafe_callback_cs);
+        fixup_guest_code_selector(c->event_callback_cs);
+        fixup_guest_code_selector(c->failsafe_callback_cs);
 #endif
 
         for ( i = 0; i < 256; i++ )
-            fixup_guest_selector(c->trap_ctxt[i].cs);
+            fixup_guest_code_selector(c->trap_ctxt[i].cs);
     }
     else if ( !hvm_enabled )
       return -EINVAL;
@@ -847,7 +847,11 @@
         regs       = guest_cpu_user_regs();
 #if defined(__i386__)
         regs->eax  = op;
-        regs->eip -= 2;  /* re-execute 'int 0x82' */
+
+        if ( supervisor_mode_kernel )
+            regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+        else
+            regs->eip -= 2;   /* re-execute 'int 0x82' */
 
         for ( i = 0; i < nr_args; i++ )
         {
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/domain_build.c       Mon Feb 27 14:52:43 2006
@@ -793,6 +793,17 @@
         update_pagetables(v);
     }
 
+    if ( supervisor_mode_kernel )
+    {
+        v->arch.guest_context.kernel_ss &= ~3;
+        v->arch.guest_context.user_regs.ss &= ~3;
+        v->arch.guest_context.user_regs.es &= ~3;
+        v->arch.guest_context.user_regs.ds &= ~3;
+        v->arch.guest_context.user_regs.fs &= ~3;
+        v->arch.guest_context.user_regs.gs &= ~3;
+        printk("Dom0 runs in ring 0 (supervisor mode)\n");
+    }
+
     rc = 0;
 
     /* DOM0 is permitted full I/O capabilities. */
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/traps.c      Mon Feb 27 14:52:43 2006
@@ -1429,7 +1429,7 @@
         if ( cur.address == 0 )
             break;
 
-        fixup_guest_selector(cur.cs);
+        fixup_guest_code_selector(cur.cs);
 
         memcpy(&dst[cur.vector], &cur, sizeof(cur));
 
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_32/asm-offsets.c Mon Feb 27 14:52:43 2006
@@ -72,6 +72,13 @@
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     BLANK();
 
+    OFFSET(TSS_ss0, struct tss_struct, ss0);
+    OFFSET(TSS_esp0, struct tss_struct, esp0);
+    OFFSET(TSS_ss1, struct tss_struct, ss1);
+    OFFSET(TSS_esp1, struct tss_struct, esp1);
+    DEFINE(TSS_sizeof, sizeof(struct tss_struct));
+    BLANK();
+
     OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
     OFFSET(VCPU_svm_hsa_pa,  struct vcpu, arch.hvm_svm.host_save_pa);
     OFFSET(VCPU_svm_vmcb, struct vcpu, arch.hvm_svm.vmcb);
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_32/entry.S       Mon Feb 27 14:52:43 2006
@@ -77,6 +77,13 @@
 restore_all_guest:
         testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
         jnz  restore_all_vm86
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+        testl $2,UREGS_cs(%esp)
+        jnz   1f
+        call  restore_ring0_guest
+        jmp   restore_all_vm86
+1:
+#endif
 FLT1:   mov  UREGS_ds(%esp),%ds
 FLT2:   mov  UREGS_es(%esp),%es
 FLT3:   mov  UREGS_fs(%esp),%fs
@@ -157,6 +164,7 @@
         ALIGN
 ENTRY(hypercall)
         subl $4,%esp
+        FIXUP_RING0_GUEST_STACK
        SAVE_ALL(b)
         sti
         GET_CURRENT(%ebx)
@@ -294,6 +302,11 @@
         popl %eax
         shll $16,%eax                    # Bits 16-23: saved_upcall_mask
         movw UREGS_cs+4(%esp),%ax        # Bits  0-15: CS
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+        testw $2,%ax
+        jnz  FLT15
+        and  $~3,%ax                     # RPL 1 -> RPL 0
+#endif
 FLT15:  movl %eax,%gs:4(%esi) 
         test $0x00FF0000,%eax            # Bits 16-23: saved_upcall_mask
         setz %ch                         # %ch == !saved_upcall_mask
@@ -388,6 +401,7 @@
        pushl $TRAP_divide_error<<16
        ALIGN
 error_code:
+        FIXUP_RING0_GUEST_STACK
         SAVE_ALL_NOSEGREGS(a)
         SET_XEN_SEGMENTS(a)
         testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
@@ -505,6 +519,10 @@
        jmp error_code
 
 ENTRY(nmi)
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+        # NMI entry protocol is incompatible with guest kernel in ring 0.
+        iret
+#else
         # Save state but do not trash the segment registers!
         # We may otherwise be unable to reload them or copy them to ring 1. 
        pushl %eax
@@ -546,6 +564,7 @@
         movl  $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_LOGICAL | \
                 TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
         jmp   restore_all_xen
+#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
 
 ENTRY(setup_vm86_frame)
         # Copies the entire stack frame forwards by 16 bytes.
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_32/mm.c  Mon Feb 27 14:52:43 2006
@@ -180,6 +180,15 @@
             page_set_owner(page, dom_xen);
         }
     }
+
+    if ( supervisor_mode_kernel )
+    {
+        /* Guest kernel runs in ring 0, not ring 1. */
+        struct desc_struct *d;
+        d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
+        d[0].b &= ~_SEGMENT_DPL;
+        d[1].b &= ~_SEGMENT_DPL;
+    }
 }
 
 long subarch_memory_op(int op, void *arg)
@@ -223,7 +232,7 @@
     int nr = smp_processor_id();
     struct tss_struct *t = &init_tss[nr];
 
-    fixup_guest_selector(ss);
+    fixup_guest_stack_selector(ss);
 
     current->arch.guest_context.kernel_ss = ss;
     current->arch.guest_context.kernel_sp = esp;
@@ -239,6 +248,10 @@
     unsigned long base, limit;
     u32 a = d->a, b = d->b;
     u16 cs;
+
+    /* Let a ring0 guest kernel set any descriptor it wants to. */
+    if ( supervisor_mode_kernel )
+        return 1;
 
     /* A not-present descriptor will always fault, so is safe. */
     if ( !(b & _SEGMENT_P) ) 
@@ -273,7 +286,7 @@
 
         /* Validate and fix up the target code selector. */
         cs = a >> 16;
-        fixup_guest_selector(cs);
+        fixup_guest_code_selector(cs);
         if ( !guest_gate_selector_okay(cs) )
             goto bad;
         a = d->a = (d->a & 0xffffU) | (cs << 16);
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_32/traps.c       Mon Feb 27 14:52:43 2006
@@ -256,8 +256,14 @@
      * We can't virtualise interrupt gates, as there's no way to get
      * the CPU to automatically clear the events_mask variable. Also we
      * must ensure that the CS is safe to poke into an interrupt gate.
-     */
-    if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) )
+     *
+     * When running with supervisor_mode_kernel enabled a direct trap
+     * to the guest OS cannot be used because the INT instruction will
+     * switch to the Xen stack and we need to swap back to the guest
+     * kernel stack before passing control to the system call entry point.
+     */
+    if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
+         supervisor_mode_kernel )
     {
         v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
         return;
@@ -278,8 +284,8 @@
 {
     struct vcpu *d = current;
 
-    fixup_guest_selector(event_selector);
-    fixup_guest_selector(failsafe_selector);
+    fixup_guest_code_selector(event_selector);
+    fixup_guest_code_selector(failsafe_selector);
 
     d->arch.guest_context.event_callback_cs     = event_selector;
     d->arch.guest_context.event_callback_eip    = event_address;
@@ -289,12 +295,51 @@
     return 0;
 }
 
-void hypercall_page_initialise(void *hypercall_page)
-{
+static void hypercall_page_initialise_ring0_kernel(void *hypercall_page)
+{
+    extern asmlinkage int hypercall(void);
     char *p;
     int i;
 
     /* Fill in all the transfer points with template machine code. */
+
+    for ( i = 0; i < NR_hypercalls; i++ )
+    {
+        p = (char *)(hypercall_page + (i * 32));
+
+        *(u8  *)(p+ 0) = 0x9c;      /* pushf */
+        *(u8  *)(p+ 1) = 0xfa;      /* cli */
+        *(u8  *)(p+ 2) = 0xb8;      /* mov $<i>,%eax */
+        *(u32 *)(p+ 3) = i;
+        *(u8  *)(p+ 7) = 0x9a;      /* lcall $__HYPERVISOR_CS,&hypercall */
+        *(u32 *)(p+ 8) = (u32)&hypercall;
+        *(u16 *)(p+12) = (u16)__HYPERVISOR_CS;
+        *(u8  *)(p+14) = 0xc3;      /* ret */
+    }
+
+    /*
+     * HYPERVISOR_iret is special because it doesn't return and expects a
+     * special stack frame. Guests jump at this transfer point instead of
+     * calling it.
+     */
+    p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
+    *(u8  *)(p+ 0) = 0x50;      /* push %eax */
+    *(u8  *)(p+ 1) = 0x9c;      /* pushf */
+    *(u8  *)(p+ 2) = 0xfa;      /* cli */
+    *(u8  *)(p+ 3) = 0xb8;      /* mov $<i>,%eax */
+    *(u32 *)(p+ 4) = __HYPERVISOR_iret;
+    *(u8  *)(p+ 8) = 0x9a;      /* lcall $__HYPERVISOR_CS,&hypercall */
+    *(u32 *)(p+ 9) = (u32)&hypercall;
+    *(u16 *)(p+13) = (u16)__HYPERVISOR_CS;
+}
+
+static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
+{
+    char *p;
+    int i;
+
+    /* Fill in all the transfer points with template machine code. */
+
     for ( i = 0; i < (PAGE_SIZE / 32); i++ )
     {
         p = (char *)(hypercall_page + (i * 32));
@@ -314,6 +359,14 @@
     *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
     *(u32 *)(p+ 2) = __HYPERVISOR_iret;
     *(u16 *)(p+ 6) = 0x82cd;  /* int  $0x82 */
+}
+
+void hypercall_page_initialise(void *hypercall_page)
+{
+    if ( supervisor_mode_kernel )
+        hypercall_page_initialise_ring0_kernel(hypercall_page);
+    else
+        hypercall_page_initialise_ring1_kernel(hypercall_page);
 }
 
 /*
diff -r 6060937db0fe -r ee8041b0ab86 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_64/mm.c  Mon Feb 27 14:52:43 2006
@@ -228,7 +228,7 @@
 
 long do_stack_switch(unsigned long ss, unsigned long esp)
 {
-    fixup_guest_selector(ss);
+    fixup_guest_stack_selector(ss);
     current->arch.guest_context.kernel_ss = ss;
     current->arch.guest_context.kernel_sp = esp;
     return 0;
@@ -315,7 +315,7 @@
 
     /* Validate and fix up the target code selector. */
     cs = a >> 16;
-    fixup_guest_selector(cs);
+    fixup_guest_code_selector(cs);
     if ( !guest_gate_selector_okay(cs) )
         goto bad;
     a = d->a = (d->a & 0xffffU) | (cs << 16);
diff -r 6060937db0fe -r ee8041b0ab86 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Mon Feb 27 11:02:00 2006
+++ b/xen/common/dom0_ops.c     Mon Feb 27 14:52:43 2006
@@ -170,6 +170,13 @@
         cpumask_t      cpu_exclude_map;
         static domid_t rover = 0;
 
+        /*
+         * Running the domain 0 kernel in ring 0 is not compatible
+         * with multiple guests.
+         */
+        if ( supervisor_mode_kernel )
+            return -EINVAL;
+
         dom = op->u.createdomain.domain;
         if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
         {
diff -r 6060937db0fe -r ee8041b0ab86 xen/common/kernel.c
--- a/xen/common/kernel.c       Mon Feb 27 11:02:00 2006
+++ b/xen/common/kernel.c       Mon Feb 27 14:52:43 2006
@@ -195,6 +195,8 @@
                     (1U << XENFEAT_writable_page_tables) |
                     (1U << XENFEAT_auto_translated_physmap) |
                     (1U << XENFEAT_pae_pgdir_above_4gb);
+            if ( supervisor_mode_kernel )
+                fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
             break;
         default:
             return -EINVAL;
diff -r 6060937db0fe -r ee8041b0ab86 xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Mon Feb 27 11:02:00 2006
+++ b/xen/include/asm-ia64/config.h     Mon Feb 27 14:52:43 2006
@@ -39,6 +39,8 @@
 //#define CONFIG_NR_CPUS 16
 //leave SMP for a later time
 //#undef CONFIG_SMP
+
+#define supervisor_mode_kernel (0)
 
 #define MAX_DMADOM_PFN (0x7FFFFFFFUL >> PAGE_SHIFT) /* 31 addressable bits */
 
diff -r 6060937db0fe -r ee8041b0ab86 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon Feb 27 11:02:00 2006
+++ b/xen/include/asm-x86/config.h      Mon Feb 27 14:52:43 2006
@@ -36,6 +36,12 @@
 #define OPT_CONSOLE_STR "com1,vga"
 
 #define NR_CPUS 32
+
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+# define supervisor_mode_kernel (1)
+#else
+# define supervisor_mode_kernel (0)
+#endif
 
 /* Linkage for x86 */
 #define __ALIGN .align 16,0x90
diff -r 6060937db0fe -r ee8041b0ab86 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h        Mon Feb 27 11:02:00 2006
+++ b/xen/include/asm-x86/desc.h        Mon Feb 27 14:52:43 2006
@@ -27,9 +27,22 @@
 #endif
 
 /* Fix up the RPL of a guest segment selector. */
-#define fixup_guest_selector(sel)                               \
+#define __fixup_guest_selector(sel)                             \
     ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) :        \
      (((sel) & ~3) | GUEST_KERNEL_RPL))
+
+/* Stack selectors don't need fixing up if the kernel runs in ring 0. */
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+#define fixup_guest_stack_selector(ss) ((void)0)
+#else
+#define fixup_guest_stack_selector(ss) __fixup_guest_selector(ss)
+#endif
+
+/*
+ * Code selectors are always fixed up. It allows the Xen exit stub to detect
+ * return to guest context, even when the guest kernel runs in ring 0.
+ */
+#define fixup_guest_code_selector(cs)  __fixup_guest_selector(cs)
 
 /*
  * We need this function because enforcing the correct guest kernel RPL is
diff -r 6060937db0fe -r ee8041b0ab86 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Mon Feb 27 11:02:00 2006
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Mon Feb 27 14:52:43 2006
@@ -48,9 +48,24 @@
 
 #ifdef PERF_COUNTERS
 #define PERFC_INCR(_name,_idx)                          \
-    lock incl perfcounters+_name(,_idx,4)
+        lock incl perfcounters+_name(,_idx,4)
 #else
 #define PERFC_INCR(_name,_idx)
+#endif
+
+#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+#define FIXUP_RING0_GUEST_STACK                         \
+        testl $2,8(%esp);                               \
+        jnz 1f; /* rings 2 & 3 permitted */             \
+        testl $1,8(%esp);                               \
+        jz 2f;                                          \
+        ud2; /* ring 1 should not be used */            \
+        2:cmpl $(__HYPERVISOR_VIRT_START),%esp;         \
+        jge 1f;                                         \
+        call fixup_ring0_guest_stack;                   \
+        1:
+#else
+#define FIXUP_RING0_GUEST_STACK
 #endif
 
 #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
@@ -61,6 +76,7 @@
     ".globl " STR(x) "\n\t"                     \
     STR(x) ":\n\t"                              \
     "pushl $"#v"<<16\n\t"                       \
+    STR(FIXUP_RING0_GUEST_STACK)                \
     STR(SAVE_ALL(a))                            \
     "movl %esp,%eax\n\t"                        \
     "pushl %eax\n\t"                            \
@@ -72,6 +88,7 @@
 __asm__(                                        \
     "\n" __ALIGN_STR"\n"                        \
     "common_interrupt:\n\t"                     \
+    STR(FIXUP_RING0_GUEST_STACK)                \
     STR(SAVE_ALL(a))                            \
     "movl %esp,%eax\n\t"                        \
     "pushl %eax\n\t"                            \
diff -r 6060937db0fe -r ee8041b0ab86 
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- /dev/null   Mon Feb 27 11:02:00 2006
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Feb 27 14:52:43 2006
@@ -0,0 +1,145 @@
+/*
+ * Handle stack fixup for guest running in RING 0.
+ *
+ * Copyright (c) 2006 Ian Campbell
+ *
+ * When a guest kernel is allowed to run in RING 0 a hypercall,
+ * interrupt or exception interrupting the guest kernel will not cause
+ * a privilege level change and therefore the stack will not be swapped
+ * to the Xen stack.
+ *
+ * To fix this we look for RING 0 activation frames with a stack
+ * pointer below HYPERVISOR_VIRT_START (indicating a guest kernel
+ * frame) and fix this up by locating the Xen stack via the TSS
+ * and moving the activation frame to the Xen stack. In the process we
+ * convert the frame into an inter-privilege frame returning to RING 1
+ * so that we can catch and reverse the process on exit.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#include <public/xen.h>
+
+        # Upon entry the stack should be the Xen stack and contain:
+        #   %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
+        # On exit the stack should be %ss:%esp (i.e. the guest stack)
+        # and contain:
+        #   EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
+        ALIGN
+ENTRY(restore_ring0_guest)
+        # Point %gs:%esi to guest stack.
+RRG0:   movw UREGS_ss+4(%esp),%gs
+        movl UREGS_esp+4(%esp),%esi
+
+        # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
+        movl $(UREGS_kernel_sizeof>>2)+1,%ecx
+
+1:      subl $4,%esi
+        movl -4(%esp,%ecx,4),%eax
+RRG1:   movl %eax,%gs:(%esi)
+        loop 1b
+
+RRG2:   andl $~3,%gs:UREGS_cs+4(%esi)
+
+        movl %gs,%eax
+
+        # We need to do this because these registers are not present
+        # on the guest stack so they cannot be restored by the code in
+        # restore_all_guest.
+RRG3:   mov  UREGS_ds+4(%esp),%ds
+RRG4:   mov  UREGS_es+4(%esp),%es
+RRG5:   mov  UREGS_fs+4(%esp),%fs
+RRG6:   mov  UREGS_gs+4(%esp),%gs
+
+RRG7:   movl %eax,%ss
+        movl %esi,%esp
+
+        ret
+.section __ex_table,"a"
+        .long RRG0,domain_crash_synchronous
+        .long RRG1,domain_crash_synchronous
+        .long RRG2,domain_crash_synchronous
+        .long RRG3,domain_crash_synchronous
+        .long RRG4,domain_crash_synchronous
+        .long RRG5,domain_crash_synchronous
+        .long RRG6,domain_crash_synchronous
+        .long RRG7,domain_crash_synchronous
+.previous
+
+        # Upon entry the stack should be a guest stack and contain:
+        #   EFLAGS, %cs, %eip, ERROR, RETURN
+        # On exit the stack should be the Xen stack and contain:
+        #   %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, RETURN
+        ALIGN
+ENTRY(fixup_ring0_guest_stack)
+        pushl %eax
+        pushl %ecx
+        pushl %ds
+        pushl %gs
+        pushl %esi
+
+        movw  $__HYPERVISOR_DS,%ax
+        movw  %ax,%ds
+
+        # Point %gs:%esi to guest stack frame.
+        movw  %ss,%ax
+        movw  %ax,%gs
+        movl  %esp,%esi
+        # Account for entries on the guest stack:
+        # * Pushed by normal exception/interrupt/hypercall mechanisms
+        #   * EFLAGS, %cs, %eip, ERROR == 4 words.
+        # * Pushed by the fixup routine
+        #   * [RETURN], %eax, %ecx, %ds, %gs and %esi == 6 words.
+        addl $((6+4)*4),%esi
+
+        # %gs:%esi now points to the guest stack before the
+        # interrupt/exception occured.
+
+        /*
+         * Reverse the __TSS macro, giving us the CPU number.
+         * The TSS for this cpu is at init_tss + ( cpu * 128 ).
+         */
+        str   %ecx
+        shrl  $3,%ecx                                   # Calculate GDT index 
for TSS.
+        subl  $(FIRST_RESERVED_GDT_ENTRY+8),%ecx        # %ecx = 2*cpu.
+        shll  $6,%ecx                                   # Each TSS entry is 
0x80 bytes
+        addl  $init_tss,%ecx                            # but we have 2*cpu 
from above.
+
+        # Load Xen stack from TSS.
+        movw  TSS_ss0(%ecx),%ax
+TRP1:   movw  %ax,%ss
+        movl  TSS_esp0(%ecx),%esp
+
+        pushl %gs
+        pushl %esi
+
+        # Move EFLAGS, %cs, %eip, ERROR, RETURN, %eax, %ecx, %ds, %gs, %esi
+        # from guest stack to Xen stack.
+        movl  $10,%ecx
+1:      subl  $4,%esp
+        subl  $4,%esi
+TRP2:   movl  %gs:(%esi),%eax
+        movl  %eax,(%esp)
+        loop  1b
+
+        # CS = CS|1 to simulate RING1 stack frame.
+        orl   $1,32(%esp)
+
+        popl  %esi
+        popl  %gs
+        popl  %ds
+        popl  %ecx
+        popl  %eax
+        ret
+.section __ex_table,"a"
+        .long TRP1,domain_crash_synchronous
+        .long TRP2,domain_crash_synchronous
+.previous
+
+domain_crash_synchronous_string:
+        .asciz "domain_crash_sync called from supervisor_mode_kernel.S (%lx)\n"
+
+domain_crash_synchronous:
+        pushl $domain_crash_synchronous_string
+        call  printf
+        jmp   __domain_crash_synchronous

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.