[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [HVM][VMX] Use CPUID instruction virtualization to workaround VMXAssist 4G limit.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID a855c7d3a5365e002736435343e5e19f008011e6
# Parent  d27d1f8ca25c8977fbb13d84fb31b9442dbd2030
[HVM][VMX] Use CPUID instruction virtualization to workaround VMXAssist 4G 
limit.

Address space access limit in VMXAssist is 4G, because IA-32 only has
4GB virtual address space which VMXassist can use to map physical
memory. The issue is, win2k3 server with more than 4G memory will put
AP GDT above 4G, so when AP changes its mode from real mode to PAE
paging mode, the long jump instrction it uses need access AP GDT
entries which resides above 4G, but because of this constraint, it can
not access GDT and so fails boot.

Signed-off-by: Xin Li <xin.b.li@xxxxxxxxx>
---
 tools/firmware/vmxassist/util.c |   25 ++++++++++++++++++++
 tools/firmware/vmxassist/util.h |    1 
 tools/firmware/vmxassist/vm86.c |   48 ++++++++++++++++++++++++++++------------
 xen/arch/x86/hvm/vmx/vmx.c      |   27 +++++++++++++++++++++-
 4 files changed, 86 insertions(+), 15 deletions(-)

diff -r d27d1f8ca25c -r a855c7d3a536 tools/firmware/vmxassist/util.c
--- a/tools/firmware/vmxassist/util.c   Thu Oct 19 15:15:36 2006 +0100
+++ b/tools/firmware/vmxassist/util.c   Thu Oct 19 15:49:16 2006 +0100
@@ -29,6 +29,31 @@ static char *printnum(char *, unsigned l
 static char *printnum(char *, unsigned long, int);
 static void _doprint(void (*)(int), char const *, va_list);
 
+void
+cpuid_addr_value(uint64_t addr, uint64_t *value)
+{
+       uint32_t addr_low   = (uint32_t)addr;
+       uint32_t addr_high  = (uint32_t)(addr >> 32);
+       uint32_t value_low, value_high;
+       static unsigned int addr_leaf;
+
+       if (!addr_leaf) {
+               unsigned int eax, ebx, ecx, edx;
+               __asm__ __volatile__(
+                       "cpuid"
+                       : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+                       : "0" (0x40000000));
+               addr_leaf = eax + 1;
+       }
+
+       __asm__ __volatile__(
+               "cpuid"
+               : "=c" (value_low), "=d" (value_high)
+               : "a" (addr_leaf), "0" (addr_low), "1" (addr_high)
+               : "ebx");
+
+       *value = (uint64_t)value_high << 32 | value_low;
+}
 
 void
 dump_regs(struct regs *regs)
diff -r d27d1f8ca25c -r a855c7d3a536 tools/firmware/vmxassist/util.h
--- a/tools/firmware/vmxassist/util.h   Thu Oct 19 15:15:36 2006 +0100
+++ b/tools/firmware/vmxassist/util.h   Thu Oct 19 15:49:16 2006 +0100
@@ -31,6 +31,7 @@
 
 struct vmx_assist_context;
 
+extern void cpuid_addr_value(uint64_t addr, uint64_t *value);
 extern void hexdump(unsigned char *, int);
 extern void dump_regs(struct regs *);
 extern void dump_vmx_context(struct vmx_assist_context *);
diff -r d27d1f8ca25c -r a855c7d3a536 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Thu Oct 19 15:15:36 2006 +0100
+++ b/tools/firmware/vmxassist/vm86.c   Thu Oct 19 15:49:16 2006 +0100
@@ -56,8 +56,8 @@ static char *rnames[] = { "ax", "cx", "d
 #define PT_ENTRY_PRESENT 0x1
 
 /* We only support access to <=4G physical memory due to 1:1 mapping */
-static unsigned
-guest_linear_to_real(uint32_t base)
+static uint64_t
+guest_linear_to_phys(uint32_t base)
 {
        uint32_t gcr3 = oldctx.cr3;
        uint64_t l2_mfn;
@@ -89,23 +89,32 @@ guest_linear_to_real(uint32_t base)
                l2_mfn = ((uint64_t *)(long)gcr3)[(base >> 30) & 0x3];
                if (!(l2_mfn & PT_ENTRY_PRESENT))
                        panic("l3 entry not present\n");
-               l2_mfn &= 0x3fffff000ULL;
-
-               l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff];
+               l2_mfn &= 0xffffff000ULL;
+
+               if (l2_mfn & 0xf00000000ULL) {
+                       printf("l2 page above 4G\n");
+                       cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), 
&l1_mfn);
+               } else
+                       l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 
0x1ff];
                if (!(l1_mfn & PT_ENTRY_PRESENT))
                        panic("l2 entry not present\n");
 
                if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */
-                       l0_mfn = l1_mfn & 0x3ffe00000ULL;
+                       l0_mfn = l1_mfn & 0xfffe00000ULL;
                        return l0_mfn + (base & 0x1fffff);
                }
 
-               l1_mfn &= 0x3fffff000ULL;
-
-               l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff];
+               l1_mfn &= 0xffffff000ULL;
+
+               if (l1_mfn & 0xf00000000ULL) {
+                       printf("l1 page above 4G\n");
+                       cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), 
&l0_mfn);
+               } else
+                       l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 
0x1ff];
                if (!(l0_mfn & PT_ENTRY_PRESENT))
                        panic("l1 entry not present\n");
-               l0_mfn &= 0x3fffff000ULL;
+
+               l0_mfn &= 0xffffff000ULL;
 
                return l0_mfn + (base & 0xfff);
        }
@@ -114,6 +123,7 @@ static unsigned
 static unsigned
 address(struct regs *regs, unsigned seg, unsigned off)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
        unsigned seg_base, seg_limit;
        unsigned entry_low, entry_high;
@@ -129,8 +139,13 @@ address(struct regs *regs, unsigned seg,
            (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
                return ((seg & 0xFFFF) << 4) + off;
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[seg >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3];
+
        entry_high = entry >> 32;
        entry_low = entry & 0xFFFFFFFF;
 
@@ -804,6 +819,7 @@ static int
 static int
 load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union 
vmcs_arbytes *arbytes)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
 
        /* protected mode: use seg as index into gdt */
@@ -815,8 +831,12 @@ load_seg(unsigned long sel, uint32_t *ba
                return 1;
        }
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[sel >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3];
 
        /* Check the P bit first */
        if (!((entry >> (15+32)) & 0x1) && sel != 0)
diff -r d27d1f8ca25c -r a855c7d3a536 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Oct 19 15:15:36 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Oct 19 15:49:16 2006 +0100
@@ -921,7 +921,32 @@ static void vmx_do_cpuid(struct cpu_user
     if ( input == CPUID_LEAF_0x4 )
     {
         cpuid_count(input, count, &eax, &ebx, &ecx, &edx);
-        eax &= NUM_CORES_RESET_MASK;  
+        eax &= NUM_CORES_RESET_MASK;
+    }
+    else if ( input == 0x40000003 )
+    {
+        /*
+         * NB. Unsupported interface for private use of VMXASSIST only.
+         * Note that this leaf lives at <max-hypervisor-leaf> + 1.
+         */
+        u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
+        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+        char *p;
+
+        DPRINTK("Input address is 0x%"PRIx64".\n", value);
+
+        /* 8-byte aligned valid pseudophys address from vmxassist, please. */
+        if ( (value & 7) || (mfn == INVALID_MFN) ||
+             !v->arch.hvm_vmx.vmxassist_enabled )
+            domain_crash_synchronous();
+
+        p = map_domain_page(mfn);
+        value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
+        unmap_domain_page(p);
+
+        DPRINTK("Output value is 0x%"PRIx64".\n", value);
+        ecx = (u32)(value >>  0);
+        edx = (u32)(value >> 32);
     }
     else if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) )
     {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.