[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Enable x86_32 PAE unmodified guests on 64-bit Xen when the hvm feature



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID eaeb26494a3947b79762df648778562234c277ba
# Parent  39f624f2b2415a3ceb7b8b09c1055b40713ef87e
Enable x86_32 PAE unmodified guests on 64-bit Xen when the hvm feature
is present. We tested only Linux at this point, and we'll improve the
functionality as we test other guests.

The SVM needs the equivalent changes to the vmc.c to get this
functionality working, but this patch does not break the build.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>

diff -r 39f624f2b241 -r eaeb26494a39 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Sat Feb 18 10:41:42 2006
+++ b/xen/arch/x86/Makefile     Sat Feb 18 10:56:13 2006
@@ -23,7 +23,7 @@
 
 OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all
 ifeq ($(TARGET_SUBARCH),x86_64) 
- OBJS += shadow.o shadow_public.o shadow_guest32.o     # x86_64: new code
+ OBJS += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o # 
x86_64: new code
 endif
 ifeq ($(TARGET_SUBARCH),x86_32) 
  ifneq ($(pae),n)
diff -r 39f624f2b241 -r eaeb26494a39 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Sat Feb 18 10:41:42 2006
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Sat Feb 18 10:56:13 2006
@@ -244,7 +244,6 @@
                        host_state->msr_items[VMX_INDEX_MSR_EFER]);
                 set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
                 set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
-                wrmsrl(MSR_EFER, msr_content);
             }
         }
         break;
@@ -433,6 +432,22 @@
     return inst_len;
 }
 
+unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num)
+{
+    switch ( num )
+    {
+    case 0:
+        return v->arch.hvm_vmx.cpu_cr0;
+    case 2:
+        return v->arch.hvm_vmx.cpu_cr2;
+    case 3:
+        return v->arch.hvm_vmx.cpu_cr3;
+    default:
+        BUG();
+    }
+    return 0;                   /* dummy */
+}
+
 extern long evtchn_send(int lport);
 void do_nmi(struct cpu_user_regs *);
 
@@ -529,6 +544,7 @@
     hvm_funcs.realmode = vmx_realmode;
     hvm_funcs.paging_enabled = vmx_paging_enabled;
     hvm_funcs.instruction_length = vmx_instruction_length;
+    hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg;
 
     hvm_enabled = 1;
 
@@ -652,14 +668,17 @@
                 !vlapic_global_enabled((VLAPIC(v))) )
             clear_bit(X86_FEATURE_APIC, &edx);
 
-#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS < 3
+        clear_bit(X86_FEATURE_PSE, &edx);
+        clear_bit(X86_FEATURE_PAE, &edx);
+        clear_bit(X86_FEATURE_PSE36, &edx);
+#else
         if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
-#endif
         {
             clear_bit(X86_FEATURE_PSE, &edx);
-            clear_bit(X86_FEATURE_PAE, &edx);
             clear_bit(X86_FEATURE_PSE36, &edx);
         }
+#endif
 
         /* Unsupportable for virtualised CPUs. */
         ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
@@ -1005,11 +1024,11 @@
         v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
         if (old_base_mfn)
              put_page(mfn_to_page(old_base_mfn));
-        update_pagetables(v);
         /*
          * arch.shadow_table should now hold the next CR3 for shadow
          */
         v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+        update_pagetables(v);
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
         __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
     }
@@ -1400,11 +1419,16 @@
             v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
-            update_pagetables(v);
             /*
              * arch.shadow_table should now hold the next CR3 for shadow
              */
+#if CONFIG_PAGING_LEVELS >= 3
+            if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
+                shadow_sync_all(v->domain);
+#endif
+
             v->arch.hvm_vmx.cpu_cr3 = value;
+            update_pagetables(v);
             HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
                         value);
             __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
@@ -1413,18 +1437,81 @@
     }
     case 4: /* CR4 */
     {
-        if (value & X86_CR4_PAE){
+        unsigned long old_cr4;
+
+        __vmread(CR4_READ_SHADOW, &old_cr4);
+
+        if ( value & X86_CR4_PAE && !(old_cr4 & X86_CR4_PAE) )
+        {
             set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
-        } else {
-            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
-                         &v->arch.hvm_vmx.cpu_state)){
+
+            if ( vmx_pgbit_test(v) ) 
+            {
+                /* The guest is 32 bit. */
+#if CONFIG_PAGING_LEVELS >= 4
+                unsigned long mfn, old_base_mfn;
+
+                if( !shadow_set_guest_paging_levels(v->domain, 3) )
+                {
+                    printk("Unsupported guest paging levels\n");
+                    domain_crash_synchronous(); /* need to take a clean path */
+                }
+
+                if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
+                                    v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
+                     !get_page(mfn_to_page(mfn), v->domain) ) 
+                {
+                    printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
+                    domain_crash_synchronous(); /* need to take a clean path */
+                }
+
+                old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+                if ( old_base_mfn )
+                    put_page(mfn_to_page(old_base_mfn));
+
+                /*
+                 * Now arch.guest_table points to machine physical.
+                 */
+
+                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+                update_pagetables(v);
+
+                HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
+                            (unsigned long) (mfn << PAGE_SHIFT));
+
+                __vmwrite(GUEST_CR3, 
pagetable_get_paddr(v->arch.shadow_table));
+
+                /*
+                 * arch->shadow_table should hold the next CR3 for shadow
+                 */
+
+                HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = 
%lx",
+                            v->arch.hvm_vmx.cpu_cr3, mfn);
+#endif
+            } 
+            else
+            {
+                /*  The guest is 64 bit. */
+#if CONFIG_PAGING_LEVELS >= 4
+                if ( !shadow_set_guest_paging_levels(v->domain, 4) ) 
+                {
+                    printk("Unsupported guest paging levels\n");
+                    domain_crash_synchronous(); /* need to take a clean path */
+                }
+#endif
+            }
+        }
+        else if ( value & X86_CR4_PAE )
+            set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
+        else
+        {
+            if ( test_bit(VMX_CPU_STATE_LMA_ENABLED, 
&v->arch.hvm_vmx.cpu_state) )
                 vmx_inject_exception(v, TRAP_gp_fault, 0);
-            }
+
             clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
         }
 
         __vmread(CR4_READ_SHADOW, &old_cr);
-
         __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
         __vmwrite(CR4_READ_SHADOW, value);
 
@@ -1432,9 +1519,9 @@
          * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
          * all TLB entries except global entries.
          */
-        if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
+        if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
             shadow_sync_all(v->domain);
-        }
+
         break;
     }
     default:
diff -r 39f624f2b241 -r eaeb26494a39 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Sat Feb 18 10:41:42 2006
+++ b/xen/arch/x86/shadow.c     Sat Feb 18 10:56:13 2006
@@ -49,12 +49,12 @@
 
 #if CONFIG_PAGING_LEVELS == 3
 static unsigned long shadow_l3_table(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn);
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn);
 #endif
 
 #if CONFIG_PAGING_LEVELS == 4
 static unsigned long shadow_l4_table(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn);
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn);
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 3
@@ -62,6 +62,7 @@
     unsigned long va, unsigned int from, unsigned int to);
 static inline void validate_bl2e_change( struct domain *d,
     guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
+static void update_top_level_shadow(struct vcpu *v, unsigned long smfn);
 #endif
 
 /********
@@ -208,7 +209,7 @@
                   u32 psh_type)
 {
     struct page_info *page;
-    unsigned long smfn;
+    unsigned long smfn, real_gpfn;
     int pin = 0;
     void *l1, *lp;
 
@@ -327,7 +328,8 @@
         break;
 
     case PGT_l4_shadow:
-        if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
+        real_gpfn = gpfn & PGT_mfn_mask;
+        if ( !shadow_promote(d, real_gpfn, gmfn, psh_type) )
             goto fail;
         perfc_incr(shadow_l4_pages);
         d->arch.shadow_page_count++;
@@ -471,10 +473,11 @@
  * Might be worth investigating...
  */
 static unsigned long shadow_l2_table(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn)
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
 {
     unsigned long smfn;
     l2_pgentry_t *spl2e;
+    struct domain *d = v->domain;
     int i;
 
     SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
@@ -851,8 +854,17 @@
                 __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
             }
         }
-        if ( i < PAGING_L4 )
-            shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, 
i));
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) 
+        {
+            if ( i < PAGING_L3 )
+                shadow_update_min_max(entry_get_pfn(sle_up), 
table_offset_64(va, i));
+        }
+        else 
+        {
+            if ( i < PAGING_L4 )
+                shadow_update_min_max(entry_get_pfn(sle_up), 
table_offset_64(va, i));
+        }
+
         sle_up = sle;
     }
 
@@ -1187,6 +1199,8 @@
         unsigned long gmfn;
         unsigned long gpfn;
         int i;
+        unsigned int base_idx = 0;
+        base_idx = get_cr3_idxval(v);
 
         gmfn = l2mfn;
         gpfn = l2pfn;
@@ -1200,7 +1214,7 @@
 
             if ( page_out_of_sync(mfn_to_page(gmfn)) &&
                  !snapshot_entry_matches(
-                     d, guest_pt, gpfn, table_offset_64(va, i)) )
+                     d, guest_pt, gpfn, guest_table_offset_64(va, i, 
base_idx)) )
             {
                 unmap_and_return (1);
             }
@@ -1481,6 +1495,74 @@
     return 0;
 }
 
+static void resync_pae_guest_l3(struct domain *d)
+{
+    struct out_of_sync_entry *entry;
+    unsigned long i, idx;
+    unsigned long smfn, gmfn;
+    pgentry_64_t *guest, *shadow_l3, *snapshot;
+    struct vcpu *v = current;
+    int max = -1;
+    int unshadow = 0;
+
+    
+    ASSERT( shadow_mode_external(d) );
+
+    gmfn = pagetable_get_pfn(v->arch.guest_table);
+           
+    for ( entry = d->arch.out_of_sync; entry; entry = entry->next ) 
+    {
+        if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
+            continue;
+        if ( entry->gmfn != gmfn )
+            continue;
+
+        idx = get_cr3_idxval(v);
+        smfn = __shadow_status(
+            d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn), 
PGT_l4_shadow);
+
+#ifndef NDEBUG
+        if ( !smfn ) 
+        {
+            BUG();
+        }
+#endif
+
+        guest    = (pgentry_64_t *)map_domain_page(entry->gmfn);
+        snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn);
+        shadow_l3 = (pgentry_64_t *)map_domain_page(smfn);
+
+        for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ ) 
+        {
+            int index = i + idx * PAE_L3_PAGETABLE_ENTRIES;
+            if ( entry_has_changed(
+                    guest[index], snapshot[index], PAGE_FLAG_MASK) ) 
+            {
+                validate_entry_change(d, &guest[index],
+                                      &shadow_l3[i], PAGING_L3);
+            }
+            if ( entry_get_value(guest[index]) != 0 )
+                max = i;
+
+            if ( !(entry_get_flags(guest[index]) & _PAGE_PRESENT) &&
+                 unlikely(entry_get_value(guest[index]) != 0) &&
+                 !unshadow &&
+                 (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
+                unshadow = 1;
+
+        }
+        if ( max == -1 )
+            unshadow = 1;
+
+        unmap_domain_page(guest);
+        unmap_domain_page(snapshot);
+        unmap_domain_page(shadow_l3);
+
+        if ( unlikely(unshadow) )
+            shadow_unpin(smfn);
+        break;
+    }
+}
 
 static int resync_all(struct domain *d, u32 stype)
 {
@@ -1823,6 +1905,64 @@
     return need_flush;
 }
 
+#if CONFIG_PAGING_LEVELS == 2
+static int resync_all_levels_guest_page(struct domain *d)
+{
+    int need_flush = 0;
+
+    need_flush |= resync_all(d, PGT_l1_shadow); 
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
+         shadow_mode_translate(d) )
+    {
+        need_flush |= resync_all(d, PGT_hl2_shadow);
+    }
+    return need_flush;
+}
+#elif CONFIG_PAGING_LEVELS == 3
+static int resync_all_levels_guest_page(struct domain *d)
+{
+    int need_flush = 0;
+
+    need_flush |= resync_all(d, PGT_l1_shadow);
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) 
+        need_flush |= resync_all(d, PGT_l4_shadow);
+    else
+    {
+        need_flush |= resync_all(d, PGT_l2_shadow);
+        if ( shadow_mode_log_dirty(d) )
+        {
+            need_flush |= resync_all(d, PGT_l3_shadow);
+            need_flush |= resync_all(d, PGT_l4_shadow);
+        }
+        else
+            resync_pae_guest_l3(d);
+    }
+    
+    return need_flush;
+}
+#elif CONFIG_PAGING_LEVELS == 4
+static int resync_all_levels_guest_page(struct domain *d)
+{
+    int need_flush = 0;
+
+    need_flush |= resync_all(d, PGT_l1_shadow);
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
+        need_flush |= resync_all(d, PGT_l4_shadow);
+    else
+    {
+        need_flush |= resync_all(d, PGT_l2_shadow);
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+            resync_pae_guest_l3(d);
+        else
+        {
+            need_flush |= resync_all(d, PGT_l3_shadow);
+            need_flush |= resync_all(d, PGT_l4_shadow);
+        }
+    }
+    return need_flush;
+}
+#endif
+
 static void sync_all(struct domain *d)
 {
     struct out_of_sync_entry *entry;
@@ -1869,29 +2009,7 @@
     /* Flush ourself later. */
     need_flush = 1;
 
-    /* Second, resync all L1 pages, then L2 pages, etc... */
-    need_flush |= resync_all(d, PGT_l1_shadow);
-
-#if CONFIG_PAGING_LEVELS == 2
-    if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
-         shadow_mode_translate(d) )  
-    {
-        need_flush |= resync_all(d, PGT_hl2_shadow);
-    }
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
-    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
-        need_flush |= resync_all(d, PGT_l4_shadow);
-    else
-        need_flush |= resync_all(d, PGT_l2_shadow);
-
-    if ( d->arch.ops->guest_paging_levels >= PAGING_L3 )
-    {
-        need_flush |= resync_all(d, PGT_l3_shadow);
-        need_flush |= resync_all(d, PGT_l4_shadow);
-    }
-#endif
+    need_flush |= resync_all_levels_guest_page(d);
 
     if ( need_flush && !unlikely(shadow_mode_external(d)) )
         local_flush_tlb();
@@ -2217,21 +2335,36 @@
         v->arch.guest_vtable = map_domain_page_global(gmfn);
     }
 
+#if CONFIG_PAGING_LEVELS >= 3
+    /*
+     * Handle 32-bit PAE enabled guest
+     */
+    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
+    {
+        u32 index = get_cr3_idxval(v);
+        gpfn = (index << PGT_score_shift) | gpfn;
+    }
+#endif
+
     /*
      *  arch.shadow_table
      */
     if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
     {
 #if CONFIG_PAGING_LEVELS == 2
-        smfn = shadow_l2_table(d, gpfn, gmfn);
+        smfn = shadow_l2_table(v, gpfn, gmfn);
 #elif CONFIG_PAGING_LEVELS == 3
-        smfn = shadow_l3_table(d, gpfn, gmfn);
+        smfn = shadow_l3_table(v, gpfn, gmfn);
 #elif CONFIG_PAGING_LEVELS == 4
-        smfn = shadow_l4_table(d, gpfn, gmfn);
+        smfn = shadow_l4_table(v, gpfn, gmfn);
 #endif
     }
     else
     {
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
+            update_top_level_shadow(v, smfn);
+#endif
         /*
          *  move sync later in order to avoid this smfn been 
          *  unshadowed occasionally
@@ -2867,14 +3000,15 @@
 
 #if CONFIG_PAGING_LEVELS == 3
 static unsigned long shadow_l3_table(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn)
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
 {
     unsigned long smfn;
     l3_pgentry_t *spl3e;
+    struct domain *d = v->domain;
 
     perfc_incrc(shadow_l3_table_count);
 
-    SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
+    SH_VVLOG("shadow_l3_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
 
     if ( SH_L1_HAS_NEXT_PAGE &&
          d->arch.ops->guest_paging_levels == PAGING_L2 )
@@ -2967,7 +3101,7 @@
 }
 #endif /* CONFIG_PAGING_LEVELS == 3 */
 
-#ifndef GUEST_PGENTRY_32
+#if (!defined(GUEST_PGENTRY_32) && !defined(GUEST_32PAE))
 static unsigned long gva_to_gpa_pae(unsigned long gva)
 {
     BUG();
@@ -2977,10 +3111,11 @@
 
 #if CONFIG_PAGING_LEVELS == 4
 static unsigned long shadow_l4_table(
-  struct domain *d, unsigned long gpfn, unsigned long gmfn)
+  struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
 {
     unsigned long smfn;
     l4_pgentry_t *spl4e;
+    struct domain *d = v->domain;
 
     SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
 
@@ -2998,6 +3133,24 @@
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+    /* For 32-bit PAE guest on 64-bit host */
+    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
+    {
+        unsigned long index;
+        /*
+         * Shadow L4's pfn_info->tlbflush_timestamp
+         * should also save it's own index.
+         */
+        index = get_cr3_idxval(v);
+        frame_table[smfn].tlbflush_timestamp = index;
+
+        memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
+        /* Map the self entry */
+        spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+        unmap_domain_page(spl4e);
+        return smfn;
+    }
 
     /* Install hypervisor and 4x linear p.t. mapings. */
     if ( (PGT_base_page_table == PGT_l4_page_table) &&
@@ -3041,6 +3194,21 @@
 #endif /* CONFIG_PAGING_LEVELS == 4 */
 
 #if CONFIG_PAGING_LEVELS >= 3
+static void 
+update_top_level_shadow(struct vcpu *v, unsigned long smfn)
+{
+    unsigned long index = get_cr3_idxval(v);
+    pgentry_64_t *sple = (pgentry_64_t *)map_domain_page(smfn);
+    pgentry_64_t *gple = (pgentry_64_t *)&v->arch.guest_vtable;
+    int i;
+
+    for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
+        validate_entry_change(
+            v->domain, &gple[index*4+i], &sple[i], PAGING_L3);
+
+    unmap_domain_page(sple);
+}
+
 /*
  * validate_bl2e_change()
  * The code is for 32-bit HVM guest on 64-bit host.
@@ -3410,6 +3578,8 @@
     pgentry_64_t gle = { 0 };
     unsigned long gpfn = 0, mfn;
     int i;
+    unsigned int base_idx = 0;
+    base_idx = get_cr3_idxval(v);
 
     ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
 
@@ -3438,7 +3608,10 @@
 #if CONFIG_PAGING_LEVELS >= 3
     if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) 
     {
-        gpfn = pagetable_get_pfn(v->arch.guest_table);
+        if ( SH_GUEST_32PAE )
+            gpfn = hvm_get_guest_ctrl_reg(v, 3);
+        else
+            gpfn = pagetable_get_pfn(v->arch.guest_table);
     }
 #endif
 
@@ -3451,7 +3624,8 @@
         mfn = gmfn_to_mfn(d, gpfn);
 
         lva = (pgentry_64_t *) map_domain_page(mfn);
-        gle = lva[table_offset_64(va, i)];
+        gle = lva[guest_table_offset_64(va, i, base_idx)];
+
         unmap_domain_page(lva);
 
         gpfn = entry_get_pfn(gle);
@@ -3695,7 +3869,7 @@
  * The naming convention of the shadow_ops:
  * MODE_<pgentry size>_<guest paging levels>_HANDLER
  */
-#ifndef GUEST_PGENTRY_32
+#if (!defined(GUEST_PGENTRY_32) && !defined(GUEST_32PAE))
 struct shadow_ops MODE_64_3_HANDLER = {
     .guest_paging_levels        = 3,
     .invlpg                     = shadow_invlpg_64,
@@ -3741,7 +3915,7 @@
 #endif
 
 #if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) ||  \
-    ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
+    ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) ) 
 
 
 /* 
diff -r 39f624f2b241 -r eaeb26494a39 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Sat Feb 18 10:41:42 2006
+++ b/xen/arch/x86/shadow_public.c      Sat Feb 18 10:56:13 2006
@@ -92,7 +92,7 @@
 /****************************************************************************/
 /************* export interface functions ***********************************/
 /****************************************************************************/
-
+void free_shadow_pages(struct domain *d);
 
 int shadow_set_guest_paging_levels(struct domain *d, int levels)
 {
@@ -106,10 +106,19 @@
         shadow_unlock(d);
         return 1;
 #endif
-#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS == 3
     case 3:
         if ( d->arch.ops != &MODE_64_3_HANDLER )
             d->arch.ops = &MODE_64_3_HANDLER;
+        shadow_unlock(d);
+        return 1;
+#endif
+#if CONFIG_PAGING_LEVELS == 4
+    case 3:
+        if ( d->arch.ops == &MODE_64_2_HANDLER )
+            free_shadow_pages(d);
+        if ( d->arch.ops != &MODE_64_PAE_HANDLER )
+            d->arch.ops = &MODE_64_PAE_HANDLER;
         shadow_unlock(d);
         return 1;
 #endif
@@ -239,9 +248,19 @@
          */
         if ( external )
         {
-            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
+            for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) {
                 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
                     put_shadow_ref(entry_get_pfn(ple[i]));
+                if (d->arch.ops->guest_paging_levels == PAGING_L3)
+                {
+#if CONFIG_PAGING_LEVELS == 4
+                    if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
+#elif CONFIG_PAGING_LEVELS == 3
+                    if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 )
+#endif
+                        break;
+                }
+            }
         } 
         else
         {
@@ -622,7 +641,7 @@
     SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
 
     ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
-#if CONFIG_PAGING_LEVELS >=4
+#if CONFIG_PAGING_LEVELS >= 4
     if ( type == PGT_fl1_shadow ) 
     {
         unsigned long mfn;
@@ -630,6 +649,10 @@
         if ( !mfn )
             gpfn |= (1UL << 63);
     }
+    if (d->arch.ops->guest_paging_levels == PAGING_L3)
+        if (type == PGT_l4_shadow ) {
+            gpfn = ((unsigned long)page->tlbflush_timestamp << 
PGT_score_shift) | gpfn;
+        }
 #endif
 
     delete_shadow_status(d, gpfn, gmfn, type);
@@ -661,6 +684,7 @@
     case PGT_l2_shadow:
     case PGT_l3_shadow:
     case PGT_l4_shadow:
+        gpfn = gpfn & PGT_mfn_mask;
         shadow_demote(d, gpfn, gmfn);
         free_shadow_tables(d, smfn, shadow_type_to_level(type));
         d->arch.shadow_page_count--;
diff -r 39f624f2b241 -r eaeb26494a39 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Sat Feb 18 10:41:42 2006
+++ b/xen/common/page_alloc.c   Sat Feb 18 10:56:13 2006
@@ -539,6 +539,7 @@
         pg[i].count_info        = 0;
         pg[i].u.inuse._domain   = 0;
         pg[i].u.inuse.type_info = 0;
+        page_set_owner(&pg[i], NULL);
     }
 
     if ( unlikely(!cpus_empty(mask)) )
diff -r 39f624f2b241 -r eaeb26494a39 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Sat Feb 18 10:41:42 2006
+++ b/xen/include/asm-x86/hvm/hvm.h     Sat Feb 18 10:56:13 2006
@@ -61,10 +61,12 @@
      * 1) determine whether the guest is in real or vm8086 mode,
      * 2) determine whether paging is enabled,
      * 3) return the length of the instruction that caused an exit.
+     * 4) return the current guest control-register value
      */
     int (*realmode)(struct vcpu *v);
     int (*paging_enabled)(struct vcpu *v);
     int (*instruction_length)(struct vcpu *v);
+    unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -163,4 +165,12 @@
 {
     return hvm_funcs.instruction_length(v);
 }
+
+static inline unsigned long
+hvm_get_guest_ctrl_reg(struct vcpu *v, unsigned int num)
+{
+    if ( hvm_funcs.get_guest_ctrl_reg )
+        return hvm_funcs.get_guest_ctrl_reg(v, num);
+    return 0;                   /* force to fail */
+}
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 39f624f2b241 -r eaeb26494a39 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Sat Feb 18 10:41:42 2006
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Sat Feb 18 10:56:13 2006
@@ -410,6 +410,14 @@
     return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
 }
 
+static inline int vmx_pgbit_test(struct vcpu *v)
+{
+    unsigned long cr0;
+
+    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
+    return (cr0 & X86_CR0_PG);
+}
+
 static inline int __vmx_inject_exception(struct vcpu *v, int trap, int type, 
                                          int error_code)
 {
diff -r 39f624f2b241 -r eaeb26494a39 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Sat Feb 18 10:41:42 2006
+++ b/xen/include/asm-x86/shadow_64.h   Sat Feb 18 10:56:13 2006
@@ -28,6 +28,7 @@
 #define _XEN_SHADOW_64_H
 #include <asm/shadow.h>
 #include <asm/shadow_ops.h>
+#include <asm/hvm/hvm.h>
 
 /*
  * The naming convention of the shadow_ops:
@@ -37,6 +38,7 @@
 extern struct shadow_ops MODE_64_3_HANDLER;
 #if CONFIG_PAGING_LEVELS == 4
 extern struct shadow_ops MODE_64_4_HANDLER;
+extern struct shadow_ops MODE_64_PAE_HANDLER;
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -106,6 +108,15 @@
 #define PAE_SHADOW_SELF_ENTRY   259
 #define PAE_L3_PAGETABLE_ENTRIES   4
 
+/******************************************************************************/
+/*
+ * The macro and inlines are for 32-bit PAE guest on 64-bit host
+ */
+#define PAE_CR3_ALIGN       5
+#define PAE_CR3_IDX_MASK    0x7f
+#define PAE_CR3_IDX_NO      128
+
+/******************************************************************************/
 static inline int  table_offset_64(unsigned long va, int level)
 {
     switch(level) {
@@ -122,10 +133,15 @@
 
 #if CONFIG_PAGING_LEVELS >= 4
 #ifndef GUEST_PGENTRY_32
+#ifndef GUEST_32PAE
         case 4:
             return  (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 
1));
 #else
         case 4:
+            return PAE_SHADOW_SELF_ENTRY;
+#endif
+#else
+        case 4:
             return PAE_SHADOW_SELF_ENTRY; 
 #endif
 #endif
@@ -133,6 +149,55 @@
             return -1;
     }
 }
+
+/*****************************************************************************/
+
+#if defined( GUEST_32PAE )
+static inline int guest_table_offset_64(unsigned long va, int level, unsigned 
int index)
+{
+    switch(level) {
+        case 1:
+            return  (((va) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 
1));
+        case 2:
+            return  (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 
1));
+        case 3:
+            return  (index * 4 + ((va) >> L3_PAGETABLE_SHIFT));
+#if CONFIG_PAGING_LEVELS == 3
+        case 4:
+            return PAE_SHADOW_SELF_ENTRY;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+#ifndef GUEST_PGENTRY_32
+        case 4:
+            return  (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 
1));
+#else
+        case 4:
+            return PAE_SHADOW_SELF_ENTRY;
+#endif
+#endif
+        default:
+            return -1;
+    }
+}
+
+static inline unsigned long get_cr3_idxval(struct vcpu *v)
+{
+    unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
+
+    return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
+}
+
+
+#define SH_GUEST_32PAE 1
+#else 
+#define guest_table_offset_64(va, level, index) \
+            table_offset_64((va),(level))
+#define get_cr3_idxval(v) 0
+#define SH_GUEST_32PAE 0
+#endif
+
+/********************************************************************************/
 
 static inline void free_out_of_sync_state(struct domain *d)
 {
@@ -163,6 +228,9 @@
     u32 level = flag & L_MASK;
     struct domain *d = v->domain;
     int root_level;
+    unsigned int base_idx;
+
+    base_idx = get_cr3_idxval(v);
 
     if ( flag & SHADOW_ENTRY )
     {
@@ -173,7 +241,10 @@
     else if ( flag & GUEST_ENTRY )
     {
         root_level = v->domain->arch.ops->guest_paging_levels;
-        index = table_offset_64(va, root_level);
+        if ( root_level == PAGING_L3 )
+            index = guest_table_offset_64(va, PAGING_L3, base_idx);
+        else
+            index = guest_table_offset_64(va, root_level, base_idx);
         le_e = (pgentry_64_t *)&v->arch.guest_vtable[index];
     }
     else /* direct mode */
@@ -199,7 +270,10 @@
         if ( le_p )
             unmap_domain_page(le_p);
         le_p = (pgentry_64_t *)map_domain_page(mfn);
-        index = table_offset_64(va, (level + i - 1));
+        if ( flag & SHADOW_ENTRY )
+            index = table_offset_64(va, (level + i - 1));
+        else
+            index = guest_table_offset_64(va, (level + i - 1), base_idx);
         le_e = &le_p[index];
     }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.